In [1]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from datetime import datetime
import matplotlib.pyplot as plt

# Shows plots in jupyter notebook
%matplotlib inline

# Set plot style
sns.set(color_codes=True)

Load Data¶

In [3]:
df = pd.read_csv('C:/Users/Tasnim/Desktop/dataUsed2/data_after_eda.csv', index_col=0)
In [4]:
df.head(3)
Out[4]:
id channel_sales cons_12m cons_gas_12m cons_last_month date_activ date_end date_modif_prod date_renewal forecast_cons_12m ... mean_3m_price_p1_var mean_3m_price_p2_var mean_3m_price_p3_var mean_3m_price_p1_fix mean_3m_price_p2_fix mean_3m_price_p3_fix mean_3m_price_p1 mean_3m_price_p2 mean_3m_price_p3 churn
0 24011ae4ebbe3035111d65fa7c15bc57 foosdfpfkusacimwkcsosbicdxkicaua 0 54946 0 2013-06-15 2016-06-15 2015-11-01 2015-06-23 0.00 ... 0.131756 0.092638 0.036909 42.497907 12.218665 8.145777 42.629663 12.311304 8.182687 1
1 764c75f661154dac3a6c254cd082ea7d foosdfpfkusacimwkcsosbicdxkicaua 544 0 0 2010-04-16 2016-04-16 2010-04-16 2015-04-17 47.96 ... 0.167798 0.088409 0.000000 44.444710 0.000000 0.000000 44.612508 0.088409 0.000000 0
2 bba03439a292a1e166f80264c16191cb lmkebamcaaclubfxadlmueccxoimlema 1584 0 0 2010-03-30 2016-03-30 2010-03-30 2015-03-31 240.04 ... 0.148586 0.000000 0.000000 44.444710 0.000000 0.000000 44.593296 0.000000 0.000000 0

3 rows × 53 columns

In [5]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Index: 10830 entries, 0 to 10829
Data columns (total 53 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              10830 non-null  object 
 1   channel_sales                   10830 non-null  object 
 2   cons_12m                        10830 non-null  int64  
 3   cons_gas_12m                    10830 non-null  int64  
 4   cons_last_month                 10830 non-null  int64  
 5   date_activ                      10830 non-null  object 
 6   date_end                        10830 non-null  object 
 7   date_modif_prod                 10830 non-null  object 
 8   date_renewal                    10830 non-null  object 
 9   forecast_cons_12m               10830 non-null  float64
 10  forecast_cons_year              10830 non-null  int64  
 11  forecast_discount_energy        10830 non-null  float64
 12  forecast_meter_rent_12m         10830 non-null  float64
 13  forecast_price_energy_off_peak  10830 non-null  float64
 14  forecast_price_energy_peak      10830 non-null  float64
 15  forecast_price_pow_off_peak     10830 non-null  float64
 16  has_gas                         10830 non-null  object 
 17  imp_cons                        10830 non-null  float64
 18  margin_gross_pow_ele            10830 non-null  float64
 19  margin_net_pow_ele              10830 non-null  float64
 20  nb_prod_act                     10830 non-null  int64  
 21  net_margin                      10830 non-null  float64
 22  num_years_antig                 10830 non-null  int64  
 23  origin_up                       10830 non-null  object 
 24  pow_max                         10830 non-null  float64
 25  mean_year_price_p1_var          10830 non-null  float64
 26  mean_year_price_p2_var          10830 non-null  float64
 27  mean_year_price_p3_var          10830 non-null  float64
 28  mean_year_price_p1_fix          10830 non-null  float64
 29  mean_year_price_p2_fix          10830 non-null  float64
 30  mean_year_price_p3_fix          10830 non-null  float64
 31  mean_year_price_p1              10830 non-null  float64
 32  mean_year_price_p2              10830 non-null  float64
 33  mean_year_price_p3              10830 non-null  float64
 34  mean_6m_price_p1_var            10830 non-null  float64
 35  mean_6m_price_p2_var            10830 non-null  float64
 36  mean_6m_price_p3_var            10830 non-null  float64
 37  mean_6m_price_p1_fix            10830 non-null  float64
 38  mean_6m_price_p2_fix            10830 non-null  float64
 39  mean_6m_price_p3_fix            10830 non-null  float64
 40  mean_6m_price_p1                10830 non-null  float64
 41  mean_6m_price_p2                10830 non-null  float64
 42  mean_6m_price_p3                10830 non-null  float64
 43  mean_3m_price_p1_var            10830 non-null  float64
 44  mean_3m_price_p2_var            10830 non-null  float64
 45  mean_3m_price_p3_var            10830 non-null  float64
 46  mean_3m_price_p1_fix            10830 non-null  float64
 47  mean_3m_price_p2_fix            10830 non-null  float64
 48  mean_3m_price_p3_fix            10830 non-null  float64
 49  mean_3m_price_p1                10830 non-null  float64
 50  mean_3m_price_p2                10830 non-null  float64
 51  mean_3m_price_p3                10830 non-null  float64
 52  churn                           10830 non-null  int64  
dtypes: float64(38), int64(7), object(8)
memory usage: 4.5+ MB
In [6]:
df["date_activ"] = pd.to_datetime(df["date_activ"], format='%Y-%m-%d')
df["date_end"] = pd.to_datetime(df["date_end"], format='%Y-%m-%d')
df["date_modif_prod"] = pd.to_datetime(df["date_modif_prod"], format='%Y-%m-%d')
df["date_renewal"] = pd.to_datetime(df["date_renewal"], format='%Y-%m-%d')
In [7]:
df.head(3)
Out[7]:
id channel_sales cons_12m cons_gas_12m cons_last_month date_activ date_end date_modif_prod date_renewal forecast_cons_12m ... mean_3m_price_p1_var mean_3m_price_p2_var mean_3m_price_p3_var mean_3m_price_p1_fix mean_3m_price_p2_fix mean_3m_price_p3_fix mean_3m_price_p1 mean_3m_price_p2 mean_3m_price_p3 churn
0 24011ae4ebbe3035111d65fa7c15bc57 foosdfpfkusacimwkcsosbicdxkicaua 0 54946 0 2013-06-15 2016-06-15 2015-11-01 2015-06-23 0.00 ... 0.131756 0.092638 0.036909 42.497907 12.218665 8.145777 42.629663 12.311304 8.182687 1
1 764c75f661154dac3a6c254cd082ea7d foosdfpfkusacimwkcsosbicdxkicaua 544 0 0 2010-04-16 2016-04-16 2010-04-16 2015-04-17 47.96 ... 0.167798 0.088409 0.000000 44.444710 0.000000 0.000000 44.612508 0.088409 0.000000 0
2 bba03439a292a1e166f80264c16191cb lmkebamcaaclubfxadlmueccxoimlema 1584 0 0 2010-03-30 2016-03-30 2010-03-30 2015-03-31 240.04 ... 0.148586 0.000000 0.000000 44.444710 0.000000 0.000000 44.593296 0.000000 0.000000 0

3 rows × 53 columns

Feature engineering¶

Difference between off-peak prices in December and preceding January¶

In [8]:
price_df = pd.read_csv('C:/Users/Tasnim/Desktop/dataUsed2//price_data.csv')
In [9]:
price_df["price_date"] = pd.to_datetime(price_df["price_date"], format='%Y-%m-%d')
price_df.head()
Out[9]:
id price_date price_off_peak_var price_peak_var price_mid_peak_var price_off_peak_fix price_peak_fix price_mid_peak_fix
0 038af19179925da21a25619c5a24b745 2015-01-01 0.151367 0.0 0.0 44.266931 0.0 0.0
1 038af19179925da21a25619c5a24b745 2015-02-01 0.151367 0.0 0.0 44.266931 0.0 0.0
2 038af19179925da21a25619c5a24b745 2015-03-01 0.151367 0.0 0.0 44.266931 0.0 0.0
3 038af19179925da21a25619c5a24b745 2015-04-01 0.149626 0.0 0.0 44.266931 0.0 0.0
4 038af19179925da21a25619c5a24b745 2015-05-01 0.149626 0.0 0.0 44.266931 0.0 0.0
In [10]:
price_df
Out[10]:
id price_date price_off_peak_var price_peak_var price_mid_peak_var price_off_peak_fix price_peak_fix price_mid_peak_fix
0 038af19179925da21a25619c5a24b745 2015-01-01 0.151367 0.000000 0.000000 44.266931 0.00000 0.000000
1 038af19179925da21a25619c5a24b745 2015-02-01 0.151367 0.000000 0.000000 44.266931 0.00000 0.000000
2 038af19179925da21a25619c5a24b745 2015-03-01 0.151367 0.000000 0.000000 44.266931 0.00000 0.000000
3 038af19179925da21a25619c5a24b745 2015-04-01 0.149626 0.000000 0.000000 44.266931 0.00000 0.000000
4 038af19179925da21a25619c5a24b745 2015-05-01 0.149626 0.000000 0.000000 44.266931 0.00000 0.000000
... ... ... ... ... ... ... ... ...
192997 16f51cdc2baa19af0b940ee1b3dd17d5 2015-08-01 0.119916 0.102232 0.076257 40.728885 24.43733 16.291555
192998 16f51cdc2baa19af0b940ee1b3dd17d5 2015-09-01 0.119916 0.102232 0.076257 40.728885 24.43733 16.291555
192999 16f51cdc2baa19af0b940ee1b3dd17d5 2015-10-01 0.119916 0.102232 0.076257 40.728885 24.43733 16.291555
193000 16f51cdc2baa19af0b940ee1b3dd17d5 2015-11-01 0.119916 0.102232 0.076257 40.728885 24.43733 16.291555
193001 16f51cdc2baa19af0b940ee1b3dd17d5 2015-12-01 0.119916 0.102232 0.076257 40.728885 24.43733 16.291555

193002 rows × 8 columns

In [11]:
# Group off-peak prices by companies and month
monthly_price_by_id = price_df.groupby(['id', 'price_date']).agg({'price_off_peak_var': 'mean', 'price_off_peak_fix': 'mean'}).reset_index()
In [12]:
monthly_price_by_id
Out[12]:
id price_date price_off_peak_var price_off_peak_fix
0 0002203ffbb812588b632b9e628cc38d 2015-01-01 0.126098 40.565969
1 0002203ffbb812588b632b9e628cc38d 2015-02-01 0.126098 40.565969
2 0002203ffbb812588b632b9e628cc38d 2015-03-01 0.128067 40.728885
3 0002203ffbb812588b632b9e628cc38d 2015-04-01 0.128067 40.728885
4 0002203ffbb812588b632b9e628cc38d 2015-05-01 0.128067 40.728885
... ... ... ... ...
192997 ffff7fa066f1fb305ae285bb03bf325a 2015-08-01 0.119916 40.728885
192998 ffff7fa066f1fb305ae285bb03bf325a 2015-09-01 0.119916 40.728885
192999 ffff7fa066f1fb305ae285bb03bf325a 2015-10-01 0.119916 40.728885
193000 ffff7fa066f1fb305ae285bb03bf325a 2015-11-01 0.119916 40.728885
193001 ffff7fa066f1fb305ae285bb03bf325a 2015-12-01 0.119916 40.728885

193002 rows × 4 columns

In [13]:
jan_prices = monthly_price_by_id.groupby('id').first().reset_index()
dec_prices = monthly_price_by_id.groupby('id').last().reset_index()
In [14]:
jan_prices 
Out[14]:
id price_date price_off_peak_var price_off_peak_fix
0 0002203ffbb812588b632b9e628cc38d 2015-01-01 0.126098 40.565969
1 0004351ebdd665e6ee664792efc4fd13 2015-01-01 0.148047 44.266931
2 0010bcc39e42b3c2131ed2ce55246e3c 2015-01-01 0.150837 44.444710
3 0010ee3855fdea87602a5b7aba8e42de 2015-01-01 0.123086 40.565969
4 00114d74e963e47177db89bc70108537 2015-01-01 0.149434 44.266931
... ... ... ... ...
16091 ffef185810e44254c3a4c6395e6b4d8a 2015-01-01 0.162720 41.063970
16092 fffac626da707b1b5ab11e8431a4d0a2 2015-01-01 0.148825 44.266931
16093 fffc0cacd305dd51f316424bbb08d1bd 2015-01-01 0.153159 41.063970
16094 fffe4f5646aa39c7f97f95ae2679ce64 2015-01-01 0.127566 40.565969
16095 ffff7fa066f1fb305ae285bb03bf325a 2015-01-01 0.129444 40.565969

16096 rows × 4 columns

In [15]:
dec_prices
Out[15]:
id price_date price_off_peak_var price_off_peak_fix
0 0002203ffbb812588b632b9e628cc38d 2015-12-01 0.119906 40.728885
1 0004351ebdd665e6ee664792efc4fd13 2015-12-01 0.143943 44.444710
2 0010bcc39e42b3c2131ed2ce55246e3c 2015-12-01 0.201280 45.944710
3 0010ee3855fdea87602a5b7aba8e42de 2015-12-01 0.113068 40.728885
4 00114d74e963e47177db89bc70108537 2015-12-01 0.145440 44.266930
... ... ... ... ...
16091 ffef185810e44254c3a4c6395e6b4d8a 2015-12-01 0.112488 40.728885
16092 fffac626da707b1b5ab11e8431a4d0a2 2015-12-01 0.145047 44.444710
16093 fffc0cacd305dd51f316424bbb08d1bd 2015-12-01 0.151399 41.228885
16094 fffe4f5646aa39c7f97f95ae2679ce64 2015-12-01 0.118175 40.728885
16095 ffff7fa066f1fb305ae285bb03bf325a 2015-12-01 0.119916 40.728885

16096 rows × 4 columns

In [16]:
diff=pd.merge(dec_prices.rename(columns={'price_off_peak_var': 'dec_1', 'price_off_peak_fix': 'dec_2'}), jan_prices.drop(columns='price_date'), on='id')
In [17]:
diff
Out[17]:
id price_date dec_1 dec_2 price_off_peak_var price_off_peak_fix
0 0002203ffbb812588b632b9e628cc38d 2015-12-01 0.119906 40.728885 0.126098 40.565969
1 0004351ebdd665e6ee664792efc4fd13 2015-12-01 0.143943 44.444710 0.148047 44.266931
2 0010bcc39e42b3c2131ed2ce55246e3c 2015-12-01 0.201280 45.944710 0.150837 44.444710
3 0010ee3855fdea87602a5b7aba8e42de 2015-12-01 0.113068 40.728885 0.123086 40.565969
4 00114d74e963e47177db89bc70108537 2015-12-01 0.145440 44.266930 0.149434 44.266931
... ... ... ... ... ... ...
16091 ffef185810e44254c3a4c6395e6b4d8a 2015-12-01 0.112488 40.728885 0.162720 41.063970
16092 fffac626da707b1b5ab11e8431a4d0a2 2015-12-01 0.145047 44.444710 0.148825 44.266931
16093 fffc0cacd305dd51f316424bbb08d1bd 2015-12-01 0.151399 41.228885 0.153159 41.063970
16094 fffe4f5646aa39c7f97f95ae2679ce64 2015-12-01 0.118175 40.728885 0.127566 40.565969
16095 ffff7fa066f1fb305ae285bb03bf325a 2015-12-01 0.119916 40.728885 0.129444 40.565969

16096 rows × 6 columns

In [18]:
# Calculate the difference
diff['offpeak_diff_dec_january_energy'] = diff['dec_1'] - diff['price_off_peak_var']
diff['offpeak_diff_dec_january_power'] = diff['dec_2'] - diff['price_off_peak_fix']
In [19]:
diff = diff[['id', 'offpeak_diff_dec_january_energy','offpeak_diff_dec_january_power']]
diff.head(2) 
Out[19]:
id offpeak_diff_dec_january_energy offpeak_diff_dec_january_power
0 0002203ffbb812588b632b9e628cc38d -0.006192 0.162916
1 0004351ebdd665e6ee664792efc4fd13 -0.004104 0.177779
In [20]:
diff.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16096 entries, 0 to 16095
Data columns (total 3 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   id                               16096 non-null  object 
 1   offpeak_diff_dec_january_energy  16096 non-null  float64
 2   offpeak_diff_dec_january_power   16096 non-null  float64
dtypes: float64(2), object(1)
memory usage: 377.4+ KB
In [21]:
df = pd.merge(df, diff, on='id')
df.head()
Out[21]:
id channel_sales cons_12m cons_gas_12m cons_last_month date_activ date_end date_modif_prod date_renewal forecast_cons_12m ... mean_3m_price_p3_var mean_3m_price_p1_fix mean_3m_price_p2_fix mean_3m_price_p3_fix mean_3m_price_p1 mean_3m_price_p2 mean_3m_price_p3 churn offpeak_diff_dec_january_energy offpeak_diff_dec_january_power
0 24011ae4ebbe3035111d65fa7c15bc57 foosdfpfkusacimwkcsosbicdxkicaua 0 54946 0 2013-06-15 2016-06-15 2015-11-01 2015-06-23 0.00 ... 0.036909 42.497907 12.218665 8.145777 42.629663 12.311304 8.182687 1 0.020057 3.700961
1 764c75f661154dac3a6c254cd082ea7d foosdfpfkusacimwkcsosbicdxkicaua 544 0 0 2010-04-16 2016-04-16 2010-04-16 2015-04-17 47.96 ... 0.000000 44.444710 0.000000 0.000000 44.612508 0.088409 0.000000 0 -0.004670 0.177779
2 bba03439a292a1e166f80264c16191cb lmkebamcaaclubfxadlmueccxoimlema 1584 0 0 2010-03-30 2016-03-30 2010-03-30 2015-03-31 240.04 ... 0.000000 44.444710 0.000000 0.000000 44.593296 0.000000 0.000000 0 -0.004547 0.177779
3 1aa498825382410b098937d65c4ec26d usilxuppasemubllopkaafesmlibmsdf 8302 0 1998 2011-12-09 2016-12-09 2015-11-01 2015-12-10 796.94 ... 0.000000 44.266930 0.000000 0.000000 44.430589 0.084587 0.000000 1 -0.008087 -0.000001
4 7ab4bf4878d8f7661dfc20e9b8e18011 foosdfpfkusacimwkcsosbicdxkicaua 45097 0 0 2011-12-02 2016-12-02 2011-12-02 2015-12-03 8069.28 ... 0.000000 44.266930 0.000000 0.000000 44.430291 0.084305 0.000000 1 -0.004628 0.000000

5 rows × 55 columns

In [22]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10830 entries, 0 to 10829
Data columns (total 55 columns):
 #   Column                           Non-Null Count  Dtype         
---  ------                           --------------  -----         
 0   id                               10830 non-null  object        
 1   channel_sales                    10830 non-null  object        
 2   cons_12m                         10830 non-null  int64         
 3   cons_gas_12m                     10830 non-null  int64         
 4   cons_last_month                  10830 non-null  int64         
 5   date_activ                       10830 non-null  datetime64[ns]
 6   date_end                         10830 non-null  datetime64[ns]
 7   date_modif_prod                  10830 non-null  datetime64[ns]
 8   date_renewal                     10830 non-null  datetime64[ns]
 9   forecast_cons_12m                10830 non-null  float64       
 10  forecast_cons_year               10830 non-null  int64         
 11  forecast_discount_energy         10830 non-null  float64       
 12  forecast_meter_rent_12m          10830 non-null  float64       
 13  forecast_price_energy_off_peak   10830 non-null  float64       
 14  forecast_price_energy_peak       10830 non-null  float64       
 15  forecast_price_pow_off_peak      10830 non-null  float64       
 16  has_gas                          10830 non-null  object        
 17  imp_cons                         10830 non-null  float64       
 18  margin_gross_pow_ele             10830 non-null  float64       
 19  margin_net_pow_ele               10830 non-null  float64       
 20  nb_prod_act                      10830 non-null  int64         
 21  net_margin                       10830 non-null  float64       
 22  num_years_antig                  10830 non-null  int64         
 23  origin_up                        10830 non-null  object        
 24  pow_max                          10830 non-null  float64       
 25  mean_year_price_p1_var           10830 non-null  float64       
 26  mean_year_price_p2_var           10830 non-null  float64       
 27  mean_year_price_p3_var           10830 non-null  float64       
 28  mean_year_price_p1_fix           10830 non-null  float64       
 29  mean_year_price_p2_fix           10830 non-null  float64       
 30  mean_year_price_p3_fix           10830 non-null  float64       
 31  mean_year_price_p1               10830 non-null  float64       
 32  mean_year_price_p2               10830 non-null  float64       
 33  mean_year_price_p3               10830 non-null  float64       
 34  mean_6m_price_p1_var             10830 non-null  float64       
 35  mean_6m_price_p2_var             10830 non-null  float64       
 36  mean_6m_price_p3_var             10830 non-null  float64       
 37  mean_6m_price_p1_fix             10830 non-null  float64       
 38  mean_6m_price_p2_fix             10830 non-null  float64       
 39  mean_6m_price_p3_fix             10830 non-null  float64       
 40  mean_6m_price_p1                 10830 non-null  float64       
 41  mean_6m_price_p2                 10830 non-null  float64       
 42  mean_6m_price_p3                 10830 non-null  float64       
 43  mean_3m_price_p1_var             10830 non-null  float64       
 44  mean_3m_price_p2_var             10830 non-null  float64       
 45  mean_3m_price_p3_var             10830 non-null  float64       
 46  mean_3m_price_p1_fix             10830 non-null  float64       
 47  mean_3m_price_p2_fix             10830 non-null  float64       
 48  mean_3m_price_p3_fix             10830 non-null  float64       
 49  mean_3m_price_p1                 10830 non-null  float64       
 50  mean_3m_price_p2                 10830 non-null  float64       
 51  mean_3m_price_p3                 10830 non-null  float64       
 52  churn                            10830 non-null  int64         
 53  offpeak_diff_dec_january_energy  10830 non-null  float64       
 54  offpeak_diff_dec_january_power   10830 non-null  float64       
dtypes: datetime64[ns](4), float64(40), int64(7), object(4)
memory usage: 4.5+ MB

Average price changes across periods¶

In [23]:
# Aggregate average prices per period by company
mean_prices = price_df.groupby(['id']).agg({
    'price_off_peak_var': 'mean', 
    'price_peak_var': 'mean', 
    'price_mid_peak_var': 'mean',
    'price_off_peak_fix': 'mean',
    'price_peak_fix': 'mean',
    'price_mid_peak_fix': 'mean'    
}).reset_index()
In [24]:
mean_prices
Out[24]:
id price_off_peak_var price_peak_var price_mid_peak_var price_off_peak_fix price_peak_fix price_mid_peak_fix
0 0002203ffbb812588b632b9e628cc38d 0.124338 0.103794 0.073160 40.701732 24.421038 16.280694
1 0004351ebdd665e6ee664792efc4fd13 0.146426 0.000000 0.000000 44.385450 0.000000 0.000000
2 0010bcc39e42b3c2131ed2ce55246e3c 0.181558 0.000000 0.000000 45.319710 0.000000 0.000000
3 0010ee3855fdea87602a5b7aba8e42de 0.118757 0.098292 0.069032 40.647427 24.388455 16.258971
4 00114d74e963e47177db89bc70108537 0.147926 0.000000 0.000000 44.266930 0.000000 0.000000
... ... ... ... ... ... ... ...
16091 ffef185810e44254c3a4c6395e6b4d8a 0.138863 0.115125 0.080780 40.896427 24.637456 16.507972
16092 fffac626da707b1b5ab11e8431a4d0a2 0.147137 0.000000 0.000000 44.311375 0.000000 0.000000
16093 fffc0cacd305dd51f316424bbb08d1bd 0.153879 0.129497 0.094842 41.160171 24.895768 16.763569
16094 fffe4f5646aa39c7f97f95ae2679ce64 0.123858 0.103499 0.073735 40.606699 24.364017 16.242678
16095 ffff7fa066f1fb305ae285bb03bf325a 0.125360 0.104895 0.075635 40.647427 24.388455 16.258971

16096 rows × 7 columns

In [25]:
#mean_prices.to_csv('C:/Users/Tasnim/Desktop/dataUsed2/AVERAGEpRICE.csv')
In [26]:
# Calculate the mean difference between consecutive periods
mean_prices['off_peak_peak_var_mean_diff'] = mean_prices['price_off_peak_var'] - mean_prices['price_peak_var']
mean_prices['peak_mid_peak_var_mean_diff'] = mean_prices['price_peak_var'] - mean_prices['price_mid_peak_var']
mean_prices['off_peak_mid_peak_var_mean_diff'] = mean_prices['price_off_peak_var'] - mean_prices['price_mid_peak_var']
mean_prices['off_peak_peak_fix_mean_diff'] = mean_prices['price_off_peak_fix'] - mean_prices['price_peak_fix']
mean_prices['peak_mid_peak_fix_mean_diff'] = mean_prices['price_peak_fix'] - mean_prices['price_mid_peak_fix']
mean_prices['off_peak_mid_peak_fix_mean_diff'] = mean_prices['price_off_peak_fix'] - mean_prices['price_mid_peak_fix']
In [27]:
mean_prices
Out[27]:
id price_off_peak_var price_peak_var price_mid_peak_var price_off_peak_fix price_peak_fix price_mid_peak_fix off_peak_peak_var_mean_diff peak_mid_peak_var_mean_diff off_peak_mid_peak_var_mean_diff off_peak_peak_fix_mean_diff peak_mid_peak_fix_mean_diff off_peak_mid_peak_fix_mean_diff
0 0002203ffbb812588b632b9e628cc38d 0.124338 0.103794 0.073160 40.701732 24.421038 16.280694 0.020545 0.030633 0.051178 16.280694 8.140345 24.421038
1 0004351ebdd665e6ee664792efc4fd13 0.146426 0.000000 0.000000 44.385450 0.000000 0.000000 0.146426 0.000000 0.146426 44.385450 0.000000 44.385450
2 0010bcc39e42b3c2131ed2ce55246e3c 0.181558 0.000000 0.000000 45.319710 0.000000 0.000000 0.181558 0.000000 0.181558 45.319710 0.000000 45.319710
3 0010ee3855fdea87602a5b7aba8e42de 0.118757 0.098292 0.069032 40.647427 24.388455 16.258971 0.020465 0.029260 0.049725 16.258972 8.129484 24.388456
4 00114d74e963e47177db89bc70108537 0.147926 0.000000 0.000000 44.266930 0.000000 0.000000 0.147926 0.000000 0.147926 44.266930 0.000000 44.266930
... ... ... ... ... ... ... ... ... ... ... ... ... ...
16091 ffef185810e44254c3a4c6395e6b4d8a 0.138863 0.115125 0.080780 40.896427 24.637456 16.507972 0.023739 0.034344 0.058083 16.258971 8.129484 24.388455
16092 fffac626da707b1b5ab11e8431a4d0a2 0.147137 0.000000 0.000000 44.311375 0.000000 0.000000 0.147137 0.000000 0.147137 44.311375 0.000000 44.311375
16093 fffc0cacd305dd51f316424bbb08d1bd 0.153879 0.129497 0.094842 41.160171 24.895768 16.763569 0.024382 0.034655 0.059037 16.264402 8.132199 24.396601
16094 fffe4f5646aa39c7f97f95ae2679ce64 0.123858 0.103499 0.073735 40.606699 24.364017 16.242678 0.020359 0.029764 0.050123 16.242682 8.121339 24.364021
16095 ffff7fa066f1fb305ae285bb03bf325a 0.125360 0.104895 0.075635 40.647427 24.388455 16.258971 0.020465 0.029260 0.049725 16.258972 8.129484 24.388456

16096 rows × 13 columns

In [28]:
columns = [
    'id', 
    'off_peak_peak_var_mean_diff',
    'peak_mid_peak_var_mean_diff', 
    'off_peak_mid_peak_var_mean_diff',
    'off_peak_peak_fix_mean_diff', 
    'peak_mid_peak_fix_mean_diff', 
    'off_peak_mid_peak_fix_mean_diff'
]
df = pd.merge(df, mean_prices[columns], on='id')
In [29]:
df.head()
Out[29]:
id channel_sales cons_12m cons_gas_12m cons_last_month date_activ date_end date_modif_prod date_renewal forecast_cons_12m ... mean_3m_price_p3 churn offpeak_diff_dec_january_energy offpeak_diff_dec_january_power off_peak_peak_var_mean_diff peak_mid_peak_var_mean_diff off_peak_mid_peak_var_mean_diff off_peak_peak_fix_mean_diff peak_mid_peak_fix_mean_diff off_peak_mid_peak_fix_mean_diff
0 24011ae4ebbe3035111d65fa7c15bc57 foosdfpfkusacimwkcsosbicdxkicaua 0 54946 0 2013-06-15 2016-06-15 2015-11-01 2015-06-23 0.00 ... 8.182687 1 0.020057 3.700961 0.024038 0.034219 0.058257 18.590255 7.45067 26.040925
1 764c75f661154dac3a6c254cd082ea7d foosdfpfkusacimwkcsosbicdxkicaua 544 0 0 2010-04-16 2016-04-16 2010-04-16 2015-04-17 47.96 ... 0.000000 0 -0.004670 0.177779 0.082090 0.088421 0.170512 44.385450 0.00000 44.385450
2 bba03439a292a1e166f80264c16191cb lmkebamcaaclubfxadlmueccxoimlema 1584 0 0 2010-03-30 2016-03-30 2010-03-30 2015-03-31 240.04 ... 0.000000 0 -0.004547 0.177779 0.151210 0.000000 0.151210 44.400265 0.00000 44.400265
3 1aa498825382410b098937d65c4ec26d usilxuppasemubllopkaafesmlibmsdf 8302 0 1998 2011-12-09 2016-12-09 2015-11-01 2015-12-10 796.94 ... 0.000000 1 -0.008087 -0.000001 0.081320 0.087632 0.168953 44.266930 0.00000 44.266930
4 7ab4bf4878d8f7661dfc20e9b8e18011 foosdfpfkusacimwkcsosbicdxkicaua 45097 0 0 2011-12-02 2016-12-02 2011-12-02 2015-12-03 8069.28 ... 0.000000 1 -0.004628 0.000000 0.081316 0.084744 0.166061 44.266930 0.00000 44.266930

5 rows × 61 columns

In [30]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10830 entries, 0 to 10829
Data columns (total 61 columns):
 #   Column                           Non-Null Count  Dtype         
---  ------                           --------------  -----         
 0   id                               10830 non-null  object        
 1   channel_sales                    10830 non-null  object        
 2   cons_12m                         10830 non-null  int64         
 3   cons_gas_12m                     10830 non-null  int64         
 4   cons_last_month                  10830 non-null  int64         
 5   date_activ                       10830 non-null  datetime64[ns]
 6   date_end                         10830 non-null  datetime64[ns]
 7   date_modif_prod                  10830 non-null  datetime64[ns]
 8   date_renewal                     10830 non-null  datetime64[ns]
 9   forecast_cons_12m                10830 non-null  float64       
 10  forecast_cons_year               10830 non-null  int64         
 11  forecast_discount_energy         10830 non-null  float64       
 12  forecast_meter_rent_12m          10830 non-null  float64       
 13  forecast_price_energy_off_peak   10830 non-null  float64       
 14  forecast_price_energy_peak       10830 non-null  float64       
 15  forecast_price_pow_off_peak      10830 non-null  float64       
 16  has_gas                          10830 non-null  object        
 17  imp_cons                         10830 non-null  float64       
 18  margin_gross_pow_ele             10830 non-null  float64       
 19  margin_net_pow_ele               10830 non-null  float64       
 20  nb_prod_act                      10830 non-null  int64         
 21  net_margin                       10830 non-null  float64       
 22  num_years_antig                  10830 non-null  int64         
 23  origin_up                        10830 non-null  object        
 24  pow_max                          10830 non-null  float64       
 25  mean_year_price_p1_var           10830 non-null  float64       
 26  mean_year_price_p2_var           10830 non-null  float64       
 27  mean_year_price_p3_var           10830 non-null  float64       
 28  mean_year_price_p1_fix           10830 non-null  float64       
 29  mean_year_price_p2_fix           10830 non-null  float64       
 30  mean_year_price_p3_fix           10830 non-null  float64       
 31  mean_year_price_p1               10830 non-null  float64       
 32  mean_year_price_p2               10830 non-null  float64       
 33  mean_year_price_p3               10830 non-null  float64       
 34  mean_6m_price_p1_var             10830 non-null  float64       
 35  mean_6m_price_p2_var             10830 non-null  float64       
 36  mean_6m_price_p3_var             10830 non-null  float64       
 37  mean_6m_price_p1_fix             10830 non-null  float64       
 38  mean_6m_price_p2_fix             10830 non-null  float64       
 39  mean_6m_price_p3_fix             10830 non-null  float64       
 40  mean_6m_price_p1                 10830 non-null  float64       
 41  mean_6m_price_p2                 10830 non-null  float64       
 42  mean_6m_price_p3                 10830 non-null  float64       
 43  mean_3m_price_p1_var             10830 non-null  float64       
 44  mean_3m_price_p2_var             10830 non-null  float64       
 45  mean_3m_price_p3_var             10830 non-null  float64       
 46  mean_3m_price_p1_fix             10830 non-null  float64       
 47  mean_3m_price_p2_fix             10830 non-null  float64       
 48  mean_3m_price_p3_fix             10830 non-null  float64       
 49  mean_3m_price_p1                 10830 non-null  float64       
 50  mean_3m_price_p2                 10830 non-null  float64       
 51  mean_3m_price_p3                 10830 non-null  float64       
 52  churn                            10830 non-null  int64         
 53  offpeak_diff_dec_january_energy  10830 non-null  float64       
 54  offpeak_diff_dec_january_power   10830 non-null  float64       
 55  off_peak_peak_var_mean_diff      10830 non-null  float64       
 56  peak_mid_peak_var_mean_diff      10830 non-null  float64       
 57  off_peak_mid_peak_var_mean_diff  10830 non-null  float64       
 58  off_peak_peak_fix_mean_diff      10830 non-null  float64       
 59  peak_mid_peak_fix_mean_diff      10830 non-null  float64       
 60  off_peak_mid_peak_fix_mean_diff  10830 non-null  float64       
dtypes: datetime64[ns](4), float64(46), int64(7), object(4)
memory usage: 5.0+ MB

Max price changes across periods and months¶

In [31]:
# Aggregate average prices per period by company
mean_prices_by_month = price_df.groupby(['id', 'price_date']).agg({
    'price_off_peak_var': 'mean', 
    'price_peak_var': 'mean', 
    'price_mid_peak_var': 'mean',
    'price_off_peak_fix': 'mean',
    'price_peak_fix': 'mean',
    'price_mid_peak_fix': 'mean'    
}).reset_index()
In [32]:
mean_prices_by_month
Out[32]:
id price_date price_off_peak_var price_peak_var price_mid_peak_var price_off_peak_fix price_peak_fix price_mid_peak_fix
0 0002203ffbb812588b632b9e628cc38d 2015-01-01 0.126098 0.103975 0.070232 40.565969 24.339581 16.226389
1 0002203ffbb812588b632b9e628cc38d 2015-02-01 0.126098 0.103975 0.070232 40.565969 24.339581 16.226389
2 0002203ffbb812588b632b9e628cc38d 2015-03-01 0.128067 0.105842 0.073773 40.728885 24.437330 16.291555
3 0002203ffbb812588b632b9e628cc38d 2015-04-01 0.128067 0.105842 0.073773 40.728885 24.437330 16.291555
4 0002203ffbb812588b632b9e628cc38d 2015-05-01 0.128067 0.105842 0.073773 40.728885 24.437330 16.291555
... ... ... ... ... ... ... ... ...
192997 ffff7fa066f1fb305ae285bb03bf325a 2015-08-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555
192998 ffff7fa066f1fb305ae285bb03bf325a 2015-09-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555
192999 ffff7fa066f1fb305ae285bb03bf325a 2015-10-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555
193000 ffff7fa066f1fb305ae285bb03bf325a 2015-11-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555
193001 ffff7fa066f1fb305ae285bb03bf325a 2015-12-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555

193002 rows × 8 columns

In [33]:
# Calculate the mean difference between consecutive periods
mean_prices_by_month['off_peak_peak_var_mean_diff'] = mean_prices_by_month['price_off_peak_var'] - mean_prices_by_month['price_peak_var']
mean_prices_by_month['peak_mid_peak_var_mean_diff'] = mean_prices_by_month['price_peak_var'] - mean_prices_by_month['price_mid_peak_var']
mean_prices_by_month['off_peak_mid_peak_var_mean_diff'] = mean_prices_by_month['price_off_peak_var'] - mean_prices_by_month['price_mid_peak_var']
mean_prices_by_month['off_peak_peak_fix_mean_diff'] = mean_prices_by_month['price_off_peak_fix'] - mean_prices_by_month['price_peak_fix']
mean_prices_by_month['peak_mid_peak_fix_mean_diff'] = mean_prices_by_month['price_peak_fix'] - mean_prices_by_month['price_mid_peak_fix']
mean_prices_by_month['off_peak_mid_peak_fix_mean_diff'] = mean_prices_by_month['price_off_peak_fix'] - mean_prices_by_month['price_mid_peak_fix']
In [34]:
mean_prices_by_month
Out[34]:
id price_date price_off_peak_var price_peak_var price_mid_peak_var price_off_peak_fix price_peak_fix price_mid_peak_fix off_peak_peak_var_mean_diff peak_mid_peak_var_mean_diff off_peak_mid_peak_var_mean_diff off_peak_peak_fix_mean_diff peak_mid_peak_fix_mean_diff off_peak_mid_peak_fix_mean_diff
0 0002203ffbb812588b632b9e628cc38d 2015-01-01 0.126098 0.103975 0.070232 40.565969 24.339581 16.226389 0.022123 0.033743 0.055866 16.226389 8.113192 24.339581
1 0002203ffbb812588b632b9e628cc38d 2015-02-01 0.126098 0.103975 0.070232 40.565969 24.339581 16.226389 0.022123 0.033743 0.055866 16.226389 8.113192 24.339581
2 0002203ffbb812588b632b9e628cc38d 2015-03-01 0.128067 0.105842 0.073773 40.728885 24.437330 16.291555 0.022225 0.032069 0.054294 16.291555 8.145775 24.437330
3 0002203ffbb812588b632b9e628cc38d 2015-04-01 0.128067 0.105842 0.073773 40.728885 24.437330 16.291555 0.022225 0.032069 0.054294 16.291555 8.145775 24.437330
4 0002203ffbb812588b632b9e628cc38d 2015-05-01 0.128067 0.105842 0.073773 40.728885 24.437330 16.291555 0.022225 0.032069 0.054294 16.291555 8.145775 24.437330
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
192997 ffff7fa066f1fb305ae285bb03bf325a 2015-08-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555 0.017684 0.025975 0.043659 16.291555 8.145775 24.437330
192998 ffff7fa066f1fb305ae285bb03bf325a 2015-09-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555 0.017684 0.025975 0.043659 16.291555 8.145775 24.437330
192999 ffff7fa066f1fb305ae285bb03bf325a 2015-10-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555 0.017684 0.025975 0.043659 16.291555 8.145775 24.437330
193000 ffff7fa066f1fb305ae285bb03bf325a 2015-11-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555 0.017684 0.025975 0.043659 16.291555 8.145775 24.437330
193001 ffff7fa066f1fb305ae285bb03bf325a 2015-12-01 0.119916 0.102232 0.076257 40.728885 24.437330 16.291555 0.017684 0.025975 0.043659 16.291555 8.145775 24.437330

193002 rows × 14 columns

In [35]:
# Calculate the maximum monthly difference across time periods
max_diff_across_periods_months = mean_prices_by_month.groupby(['id']).agg({
    'off_peak_peak_var_mean_diff': 'max',
    'peak_mid_peak_var_mean_diff': 'max',
    'off_peak_mid_peak_var_mean_diff': 'max',
    'off_peak_peak_fix_mean_diff': 'max',
    'peak_mid_peak_fix_mean_diff': 'max',
    'off_peak_mid_peak_fix_mean_diff': 'max'
}).reset_index().rename(
    columns={
        'off_peak_peak_var_mean_diff': 'off_peak_peak_var_max_monthly_diff',
        'peak_mid_peak_var_mean_diff': 'peak_mid_peak_var_max_monthly_diff',
        'off_peak_mid_peak_var_mean_diff': 'off_peak_mid_peak_var_max_monthly_diff',
        'off_peak_peak_fix_mean_diff': 'off_peak_peak_fix_max_monthly_diff',
        'peak_mid_peak_fix_mean_diff': 'peak_mid_peak_fix_max_monthly_diff',
        'off_peak_mid_peak_fix_mean_diff': 'off_peak_mid_peak_fix_max_monthly_diff'
    }
)
In [36]:
max_diff_across_periods_months
Out[36]:
id off_peak_peak_var_max_monthly_diff peak_mid_peak_var_max_monthly_diff off_peak_mid_peak_var_max_monthly_diff off_peak_peak_fix_max_monthly_diff peak_mid_peak_fix_max_monthly_diff off_peak_mid_peak_fix_max_monthly_diff
0 0002203ffbb812588b632b9e628cc38d 0.022225 0.033743 0.055866 16.291555 8.145775 24.437330
1 0004351ebdd665e6ee664792efc4fd13 0.148405 0.000000 0.148405 44.444710 0.000000 44.444710
2 0010bcc39e42b3c2131ed2ce55246e3c 0.205742 0.000000 0.205742 45.944710 0.000000 45.944710
3 0010ee3855fdea87602a5b7aba8e42de 0.022581 0.031859 0.054440 16.291555 8.145775 24.437330
4 00114d74e963e47177db89bc70108537 0.149902 0.000000 0.149902 44.266931 0.000000 44.266931
... ... ... ... ... ... ... ...
16091 ffef185810e44254c3a4c6395e6b4d8a 0.029128 0.042028 0.071156 16.291555 8.145775 24.437330
16092 fffac626da707b1b5ab11e8431a4d0a2 0.148825 0.000000 0.148825 44.444710 0.000000 44.444710
16093 fffc0cacd305dd51f316424bbb08d1bd 0.028520 0.039592 0.068112 16.291555 8.145775 24.437330
16094 fffe4f5646aa39c7f97f95ae2679ce64 0.022138 0.031941 0.054079 16.291555 8.145775 24.437330
16095 ffff7fa066f1fb305ae285bb03bf325a 0.022581 0.031859 0.054440 16.291555 8.145775 24.437330

16096 rows × 7 columns

In [37]:
columns = [
    'id',
    'off_peak_peak_var_max_monthly_diff',
    'peak_mid_peak_var_max_monthly_diff',
    'off_peak_mid_peak_var_max_monthly_diff',
    'off_peak_peak_fix_max_monthly_diff',
    'peak_mid_peak_fix_max_monthly_diff',
    'off_peak_mid_peak_fix_max_monthly_diff'
]

df = pd.merge(df, max_diff_across_periods_months[columns], on='id')
df.head()
Out[37]:
id channel_sales cons_12m cons_gas_12m cons_last_month date_activ date_end date_modif_prod date_renewal forecast_cons_12m ... off_peak_mid_peak_var_mean_diff off_peak_peak_fix_mean_diff peak_mid_peak_fix_mean_diff off_peak_mid_peak_fix_mean_diff off_peak_peak_var_max_monthly_diff peak_mid_peak_var_max_monthly_diff off_peak_mid_peak_var_max_monthly_diff off_peak_peak_fix_max_monthly_diff peak_mid_peak_fix_max_monthly_diff off_peak_mid_peak_fix_max_monthly_diff
0 24011ae4ebbe3035111d65fa7c15bc57 foosdfpfkusacimwkcsosbicdxkicaua 0 54946 0 2013-06-15 2016-06-15 2015-11-01 2015-06-23 0.00 ... 0.058257 18.590255 7.45067 26.040925 0.060550 0.085483 0.146033 44.266930 8.145775 44.266930
1 764c75f661154dac3a6c254cd082ea7d foosdfpfkusacimwkcsosbicdxkicaua 544 0 0 2010-04-16 2016-04-16 2010-04-16 2015-04-17 47.96 ... 0.170512 44.385450 0.00000 44.385450 0.084587 0.089162 0.172468 44.444710 0.000000 44.444710
2 bba03439a292a1e166f80264c16191cb lmkebamcaaclubfxadlmueccxoimlema 1584 0 0 2010-03-30 2016-03-30 2010-03-30 2015-03-31 240.04 ... 0.151210 44.400265 0.00000 44.400265 0.153133 0.000000 0.153133 44.444710 0.000000 44.444710
3 1aa498825382410b098937d65c4ec26d usilxuppasemubllopkaafesmlibmsdf 8302 0 1998 2011-12-09 2016-12-09 2015-11-01 2015-12-10 796.94 ... 0.168953 44.266930 0.00000 44.266930 0.082931 0.088815 0.171746 44.266931 0.000000 44.266931
4 7ab4bf4878d8f7661dfc20e9b8e18011 foosdfpfkusacimwkcsosbicdxkicaua 45097 0 0 2011-12-02 2016-12-02 2011-12-02 2015-12-03 8069.28 ... 0.166061 44.266930 0.00000 44.266930 0.082931 0.085058 0.167989 44.266930 0.000000 44.266930

5 rows × 67 columns

Calculating maximum price changes across different time periods and months helps identify significant fluctuations in prices Sudden and substantial price changes can influence customers to consider switching to other utility providers, making it a valuable feature for predicting customer churn.

Tenure¶

How long a company has been a client of PowerCo.

In [38]:
df['tenure'] = ((df['date_end'] - df['date_activ'])/ np.timedelta64(1, 'Y')).astype(int)
In [39]:
df.groupby(['tenure']).agg({'churn': 'mean'}).sort_values(by='churn', ascending=False)
Out[39]:
churn
tenure
2 0.176471
3 0.146302
4 0.127268
5 0.096347
8 0.076923
6 0.073129
7 0.067628
9 0.000000
10 0.000000
11 0.000000

We can see that companies who have only been a client for 4 or less months are much more likely to churn compared to companies that have been a client for longer. Interestingly, the difference between 4 and 5 months is about 4%, which represents a large jump in likelihood for a customer to churn compared to the other differences between ordered tenure values. Perhaps this reveals that getting a customer to over 4 months tenure is actually a large milestone with respect to keeping them as a long term customer.

This is an interesting feature to keep for modelling because clearly how long you've been a client, has a influence on the chance of a client churning.

Transforming DATA¶

Transforming dates into months¶

  • months_activ = Number of months active until reference date (Jan 2016)
  • months_to_end = Number of months of the contract left until reference date (Jan 2016)
  • months_modif_prod = Number of months since last modification until reference date (Jan 2016)
  • months_renewal = Number of months since last renewal until reference date (Jan 2016)
In [40]:
def convert_months(reference_date, df, column):
    """
    Input a column with timedeltas and return months
    """
    time_delta = reference_date - df[column]
    months = (time_delta / np.timedelta64(1, 'M')).astype(int)
    return months
In [41]:
# Create reference date
reference_date = datetime(2016, 1, 1)

# Create columns
df['months_activ'] = convert_months(reference_date, df, 'date_activ')
df['months_to_end'] = -convert_months(reference_date, df, 'date_end')
df['months_modif_prod'] = convert_months(reference_date, df, 'date_modif_prod')
df['months_renewal'] = convert_months(reference_date, df, 'date_renewal')

Dates in their raw datetime format aren't directly suitable for predictive modeling. We need to transform these dates into more informative features with potential predictive power.

Consider the following scenarios:

  1. Months of Activity (months_activ): Longer-term clients may exhibit higher brand loyalty, suggesting they are less likely to churn. Conversely, newer clients might be more volatile. The months_activ feature captures the duration of a client's engagement with PowerCo.

  2. Months Until Contract Ends (months_to_end): Clients nearing the end of their contracts might explore new options, while those who recently joined could still be within a grace period to leave. Additionally, mid-contract termination might incur charges. months_to_end helps us understand churn timing patterns.

  3. Months Since Last Contract Update (months_modif_prod): Clients who recently modified their contracts may be more engaged or have received customer service attention. This could indicate a positive sign of customer engagement.

  4. Months Since Last Renewal (months_renewal): The duration since a client last renewed their contract reveals both engagement and commitment levels. Clients who renew their contracts may exhibit higher loyalty.

These engineered features provide insights into client behavior and engagement, helping us better understand and predict churn patterns.

In [42]:
# We no longer need the datetime columns that we used for feature engineering, so we can drop them
remove = [
    'date_activ',
    'date_end',
    'date_modif_prod',
    'date_renewal'
]

df = df.drop(columns=remove)
df.head()
Out[42]:
id channel_sales cons_12m cons_gas_12m cons_last_month forecast_cons_12m forecast_cons_year forecast_discount_energy forecast_meter_rent_12m forecast_price_energy_off_peak ... peak_mid_peak_var_max_monthly_diff off_peak_mid_peak_var_max_monthly_diff off_peak_peak_fix_max_monthly_diff peak_mid_peak_fix_max_monthly_diff off_peak_mid_peak_fix_max_monthly_diff tenure months_activ months_to_end months_modif_prod months_renewal
0 24011ae4ebbe3035111d65fa7c15bc57 foosdfpfkusacimwkcsosbicdxkicaua 0 54946 0 0.00 0 0.0 1.78 0.114481 ... 0.085483 0.146033 44.266930 8.145775 44.266930 3 30 5 1 6
1 764c75f661154dac3a6c254cd082ea7d foosdfpfkusacimwkcsosbicdxkicaua 544 0 0 47.96 0 0.0 38.72 0.165794 ... 0.089162 0.172468 44.444710 0.000000 44.444710 6 67 3 67 8
2 bba03439a292a1e166f80264c16191cb lmkebamcaaclubfxadlmueccxoimlema 1584 0 0 240.04 0 0.0 19.83 0.146694 ... 0.000000 0.153133 44.444710 0.000000 44.444710 6 67 2 67 8
3 1aa498825382410b098937d65c4ec26d usilxuppasemubllopkaafesmlibmsdf 8302 0 1998 796.94 1998 0.0 30.12 0.164775 ... 0.088815 0.171746 44.266931 0.000000 44.266931 5 47 11 1 0
4 7ab4bf4878d8f7661dfc20e9b8e18011 foosdfpfkusacimwkcsosbicdxkicaua 45097 0 0 8069.28 0 0.0 0.00 0.166178 ... 0.085058 0.167989 44.266930 0.000000 44.266930 5 48 10 48 0

5 rows × 68 columns

Transforming Boolean data¶

has_gas¶

We simply want to transform this column from being categorical to being a binary flag

In [43]:
df['has_gas'] = df['has_gas'].replace(['t', 'f'], [1, 0])
df.groupby(['has_gas']).agg({'churn': 'mean'})
Out[43]:
churn
has_gas
0 0.107881
1 0.088594

Having both electricity and gas services with PowerCo indicates that a customer uses multiple products and exhibits loyalty to the brand. Consequently, it's not unexpected to observe that customers who exclusively purchase electricity (without gas) are nearly 2% more likely to churn compared to those who also include gas in their services. This feature holds significance for churn prediction.

Transforming categorical data¶

In predictive modeling, categorical or string values cannot be directly processed. As a data scientist, my task is to transform these categorical features into numerical representations that are both concise and discriminative.

The most straightforward approach involves assigning each category a unique integer (label encoding). However, this may introduce an unintended ordinal relationship among categori s (e.g., 0 < 1 < 2 < 3), which is not always appropriate.

AlternativeI you can employ dummy variables, also known as one-hot encoding, to encode categorical features. This method creates a new binary feature for each distinct value in a categorical column. These binary features indicate whether a company belongs to a particular category (1) or not (0).

channel_sales¶

In [44]:
# Transform into categorical type
df['channel_sales'] = df['channel_sales'].astype('category')

# Let's see how many categories are within this column
df['channel_sales'].value_counts()
Out[44]:
channel_sales
foosdfpfkusacimwkcsosbicdxkicaua    6721
lmkebamcaaclubfxadlmueccxoimlema    1836
usilxuppasemubllopkaafesmlibmsdf    1369
ewpakwlliwisiwduibdlfmalxowmwpci     888
sddiedcslfslkckwlfkdpoeeailfpeds      11
epumfxlbckeskwekxbiuasklxalciiuu       3
fixdbufsefwooaasfcxdxadsiekoceaa       2
Name: count, dtype: int64

We have a total of 8 categories in this column, and we intend to create 8 dummy variables based on these categories. Nevertheless, examining the output, it's evident that the last 3 categories have very limited occurrences, specifically 11, 3, and 2 occurrences. Given the substantial size of our dataset, comprising approximately 10830 rows, these dummy variables would predominantly consist of 0 values. Consequently, they would contribute minimal predictive power to the model, as they essentially act as constants with very limited variation.

Hence, we have decided to exclude these 3 sparse dummy variables from our analysis.

In [45]:
df = pd.get_dummies(df, columns=['channel_sales'], prefix='channel')
df = df.drop(columns=['channel_sddiedcslfslkckwlfkdpoeeailfpeds', 'channel_epumfxlbckeskwekxbiuasklxalciiuu', 'channel_fixdbufsefwooaasfcxdxadsiekoceaa'])
df.head()
Out[45]:
id cons_12m cons_gas_12m cons_last_month forecast_cons_12m forecast_cons_year forecast_discount_energy forecast_meter_rent_12m forecast_price_energy_off_peak forecast_price_energy_peak ... off_peak_mid_peak_fix_max_monthly_diff tenure months_activ months_to_end months_modif_prod months_renewal channel_ewpakwlliwisiwduibdlfmalxowmwpci channel_foosdfpfkusacimwkcsosbicdxkicaua channel_lmkebamcaaclubfxadlmueccxoimlema channel_usilxuppasemubllopkaafesmlibmsdf
0 24011ae4ebbe3035111d65fa7c15bc57 0 54946 0 0.00 0 0.0 1.78 0.114481 0.098142 ... 44.266930 3 30 5 1 6 False True False False
1 764c75f661154dac3a6c254cd082ea7d 544 0 0 47.96 0 0.0 38.72 0.165794 0.087899 ... 44.444710 6 67 3 67 8 False True False False
2 bba03439a292a1e166f80264c16191cb 1584 0 0 240.04 0 0.0 19.83 0.146694 0.000000 ... 44.444710 6 67 2 67 8 False False True False
3 1aa498825382410b098937d65c4ec26d 8302 0 1998 796.94 1998 0.0 30.12 0.164775 0.086131 ... 44.266931 5 47 11 1 0 False False False True
4 7ab4bf4878d8f7661dfc20e9b8e18011 45097 0 0 8069.28 0 0.0 0.00 0.166178 0.087538 ... 44.266930 5 48 10 48 0 False True False False

5 rows × 71 columns

In [46]:
df['channel_foosdfpfkusacimwkcsosbicdxkicaua'] = df['channel_foosdfpfkusacimwkcsosbicdxkicaua'].replace(['True ', 'False'], [1, 0])
df['channel_ewpakwlliwisiwduibdlfmalxowmwpci'] = df['channel_ewpakwlliwisiwduibdlfmalxowmwpci'].replace(['True ', 'False'], [1, 0])
df['channel_lmkebamcaaclubfxadlmueccxoimlema'] = df['channel_lmkebamcaaclubfxadlmueccxoimlema'].replace(['True ', 'False'], [1, 0])
df['channel_usilxuppasemubllopkaafesmlibmsdf'] = df['channel_usilxuppasemubllopkaafesmlibmsdf'].replace(['True ', 'False'], [1, 0])
In [47]:
df.head()
Out[47]:
id cons_12m cons_gas_12m cons_last_month forecast_cons_12m forecast_cons_year forecast_discount_energy forecast_meter_rent_12m forecast_price_energy_off_peak forecast_price_energy_peak ... off_peak_mid_peak_fix_max_monthly_diff tenure months_activ months_to_end months_modif_prod months_renewal channel_ewpakwlliwisiwduibdlfmalxowmwpci channel_foosdfpfkusacimwkcsosbicdxkicaua channel_lmkebamcaaclubfxadlmueccxoimlema channel_usilxuppasemubllopkaafesmlibmsdf
0 24011ae4ebbe3035111d65fa7c15bc57 0 54946 0 0.00 0 0.0 1.78 0.114481 0.098142 ... 44.266930 3 30 5 1 6 False True False False
1 764c75f661154dac3a6c254cd082ea7d 544 0 0 47.96 0 0.0 38.72 0.165794 0.087899 ... 44.444710 6 67 3 67 8 False True False False
2 bba03439a292a1e166f80264c16191cb 1584 0 0 240.04 0 0.0 19.83 0.146694 0.000000 ... 44.444710 6 67 2 67 8 False False True False
3 1aa498825382410b098937d65c4ec26d 8302 0 1998 796.94 1998 0.0 30.12 0.164775 0.086131 ... 44.266931 5 47 11 1 0 False False False True
4 7ab4bf4878d8f7661dfc20e9b8e18011 45097 0 0 8069.28 0 0.0 0.00 0.166178 0.087538 ... 44.266930 5 48 10 48 0 False True False False

5 rows × 71 columns

In [48]:
df['channel_foosdfpfkusacimwkcsosbicdxkicaua'] = df['channel_foosdfpfkusacimwkcsosbicdxkicaua'].astype(int)
df['channel_ewpakwlliwisiwduibdlfmalxowmwpci'] = df['channel_ewpakwlliwisiwduibdlfmalxowmwpci'].astype(int)
df['channel_lmkebamcaaclubfxadlmueccxoimlema'] = df['channel_lmkebamcaaclubfxadlmueccxoimlema'].astype(int)
df['channel_usilxuppasemubllopkaafesmlibmsdf'] = df['channel_usilxuppasemubllopkaafesmlibmsdf'].astype(int)
In [49]:
df.head()
Out[49]:
id cons_12m cons_gas_12m cons_last_month forecast_cons_12m forecast_cons_year forecast_discount_energy forecast_meter_rent_12m forecast_price_energy_off_peak forecast_price_energy_peak ... off_peak_mid_peak_fix_max_monthly_diff tenure months_activ months_to_end months_modif_prod months_renewal channel_ewpakwlliwisiwduibdlfmalxowmwpci channel_foosdfpfkusacimwkcsosbicdxkicaua channel_lmkebamcaaclubfxadlmueccxoimlema channel_usilxuppasemubllopkaafesmlibmsdf
0 24011ae4ebbe3035111d65fa7c15bc57 0 54946 0 0.00 0 0.0 1.78 0.114481 0.098142 ... 44.266930 3 30 5 1 6 0 1 0 0
1 764c75f661154dac3a6c254cd082ea7d 544 0 0 47.96 0 0.0 38.72 0.165794 0.087899 ... 44.444710 6 67 3 67 8 0 1 0 0
2 bba03439a292a1e166f80264c16191cb 1584 0 0 240.04 0 0.0 19.83 0.146694 0.000000 ... 44.444710 6 67 2 67 8 0 0 1 0
3 1aa498825382410b098937d65c4ec26d 8302 0 1998 796.94 1998 0.0 30.12 0.164775 0.086131 ... 44.266931 5 47 11 1 0 0 0 0 1
4 7ab4bf4878d8f7661dfc20e9b8e18011 45097 0 0 8069.28 0 0.0 0.00 0.166178 0.087538 ... 44.266930 5 48 10 48 0 0 1 0 0

5 rows × 71 columns

origin_up¶

In [50]:
# Transform into categorical type
df['origin_up'] = df['origin_up'].astype('category')

# Let's see how many categories are within this column
df['origin_up'].value_counts()
Out[50]:
origin_up
lxidpiddsbxsbosboudacockeimpuepw    6440
kamkkxfxxuwbdslkwifmmcsiusiuosws    2748
ldkssxwpmemidmecebumciepifcamkci    1641
usapbepcfoloekilkwsdiboslwaxobdp       1
Name: count, dtype: int64
In [51]:
df = pd.get_dummies(df, columns=['origin_up'], prefix='origin_up')
df = df.drop(columns=[ 'origin_up_usapbepcfoloekilkwsdiboslwaxobdp'])
df.head()
Out[51]:
id cons_12m cons_gas_12m cons_last_month forecast_cons_12m forecast_cons_year forecast_discount_energy forecast_meter_rent_12m forecast_price_energy_off_peak forecast_price_energy_peak ... months_to_end months_modif_prod months_renewal channel_ewpakwlliwisiwduibdlfmalxowmwpci channel_foosdfpfkusacimwkcsosbicdxkicaua channel_lmkebamcaaclubfxadlmueccxoimlema channel_usilxuppasemubllopkaafesmlibmsdf origin_up_kamkkxfxxuwbdslkwifmmcsiusiuosws origin_up_ldkssxwpmemidmecebumciepifcamkci origin_up_lxidpiddsbxsbosboudacockeimpuepw
0 24011ae4ebbe3035111d65fa7c15bc57 0 54946 0 0.00 0 0.0 1.78 0.114481 0.098142 ... 5 1 6 0 1 0 0 False False True
1 764c75f661154dac3a6c254cd082ea7d 544 0 0 47.96 0 0.0 38.72 0.165794 0.087899 ... 3 67 8 0 1 0 0 True False False
2 bba03439a292a1e166f80264c16191cb 1584 0 0 240.04 0 0.0 19.83 0.146694 0.000000 ... 2 67 8 0 0 1 0 True False False
3 1aa498825382410b098937d65c4ec26d 8302 0 1998 796.94 1998 0.0 30.12 0.164775 0.086131 ... 11 1 0 0 0 0 1 False False True
4 7ab4bf4878d8f7661dfc20e9b8e18011 45097 0 0 8069.28 0 0.0 0.00 0.166178 0.087538 ... 10 48 0 0 1 0 0 False False True

5 rows × 73 columns

Dealing with Skewed Numeric Data¶

In my previous exploration, I identified that some of the numeric features exhibit significant skewness. It's essential to address skewness because certain predictive models operate under the assumption that input features are normally distributed and independent. These models, often referred to as 'parametric' models, may not perform optimally when presented with skewed data.

Skewed data can impact not only the model's accuracy but also its convergence speed, making it crucial to mitigate skewness. There are various techniques for handling skewed features, including transformations like:

  • Square root
  • Cubic root
  • Logarithm

For this particular analysis, I'll employ the logarithmic transformation to address positively skewed features.

It's worth noting that applying the logarithm directly to zero-valued entries isn't possible, so we'll introduce a small constant (1) to all the values.

To begin, I'll examine the statistical properties of the skewed features before and after applying the transformation.

Before¶

We can see that the standard deviation for most of these features is quite high.

In [52]:
# Apply log10 transformation
df["cons_12m"] = np.log10(df["cons_12m"] + 1)
df["cons_gas_12m"] = np.log10(df["cons_gas_12m"] + 1)
df["cons_last_month"] = np.log10(df["cons_last_month"] + 1)
df["forecast_cons_12m"] = np.log10(df["forecast_cons_12m"] + 1)
df["forecast_cons_year"] = np.log10(df["forecast_cons_year"] + 1)
df["forecast_meter_rent_12m"] = np.log10(df["forecast_meter_rent_12m"] + 1)
df["imp_cons"] = np.log10(df["imp_cons"] + 1)
After¶
In [54]:
skewed = [
    'cons_12m', 
    'cons_gas_12m', 
    'cons_last_month',
    'forecast_cons_12m', 
    'forecast_cons_year', 
    'forecast_discount_energy',
    'forecast_meter_rent_12m', 
    'forecast_price_energy_off_peak',
    'forecast_price_energy_peak', 
    'forecast_price_pow_off_peak'
]
In [55]:
df[skewed].describe()
Out[55]:
cons_12m cons_gas_12m cons_last_month forecast_cons_12m forecast_cons_year forecast_discount_energy forecast_meter_rent_12m forecast_price_energy_off_peak forecast_price_energy_peak forecast_price_pow_off_peak
count 10830.000000 10830.000000 10830.000000 10830.000000 10830.000000 10830.000000 10830.000000 10830.000000 10830.000000 10830.000000
mean 4.267846 0.746062 2.325621 3.002182 1.817738 1.072669 1.520898 0.138370 0.049930 43.217334
std 0.869224 1.690895 1.769891 0.613979 1.576417 5.350731 0.563951 0.022869 0.049371 2.941325
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 3.770410 0.000000 0.000000 2.716130 0.000000 0.000000 1.235276 0.116509 0.000000 40.606701
50% 4.175193 0.000000 2.928396 3.065378 2.534660 0.000000 1.298416 0.143166 0.083849 44.311378
75% 4.645525 0.000000 3.560355 3.394714 3.245266 0.000000 2.118695 0.146348 0.098861 44.311378
max 6.792889 6.618528 5.887169 4.918575 4.898330 30.000000 2.778376 0.273963 0.168092 59.266378

Now we can see that for the majority of the features, their standard deviation is much lower after transformation. This is a good thing, it shows that these features are more stable and predictable now.

Let's quickly check the distributions of some of these features too.

In [56]:
fig, axs = plt.subplots(nrows=3, figsize=(18, 20))

# Plot histograms
sns.histplot(df["cons_12m"].dropna(), ax=axs[0], kde=True)
sns.histplot(df[df["has_gas"] == 1]["cons_gas_12m"].dropna(), ax=axs[1], kde=True)
sns.histplot(df["cons_last_month"].dropna(), ax=axs[2], kde=True)

plt.show()
No description has been provided for this image

Correlation¶

Key Points:

  • Feature engineering is iterative and aims to enhance a predictive model.

  • Correlation analysis helps us understand feature relationships.

  • Features should correlate with the target variable (churn) for predictive value.

  • High feature correlation may imply redundancy, but perfect independence is rare.

  • Real datasets often have some feature correlation.

  • We'll examine feature correlations in our dataset.

In [57]:
df = df.apply(pd.to_numeric, errors='coerce') 
In [58]:
correlation = df.corr()
In [59]:
# Plot correlation
plt.figure(figsize=(45, 45))
sns.heatmap(
    correlation, 
    xticklabels=correlation.columns.values,
    yticklabels=correlation.columns.values, 
    annot=True, 
    annot_kws={'size': 12}
)
# Axis ticks size
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.show()
No description has been provided for this image
In [60]:
columns_to_drop = ['mean_year_price_p1', 'mean_year_price_p2', 'mean_year_price_p3', 'mean_6m_price_p1', 'mean_6m_price_p2', 'mean_6m_price_p3', 'mean_3m_price_p1', 'mean_3m_price_p2', 'mean_3m_price_p3']
df.drop(columns=columns_to_drop, inplace=True)
In [61]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10830 entries, 0 to 10829
Data columns (total 64 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   id                                          0 non-null      float64
 1   cons_12m                                    10830 non-null  float64
 2   cons_gas_12m                                10830 non-null  float64
 3   cons_last_month                             10830 non-null  float64
 4   forecast_cons_12m                           10830 non-null  float64
 5   forecast_cons_year                          10830 non-null  float64
 6   forecast_discount_energy                    10830 non-null  float64
 7   forecast_meter_rent_12m                     10830 non-null  float64
 8   forecast_price_energy_off_peak              10830 non-null  float64
 9   forecast_price_energy_peak                  10830 non-null  float64
 10  forecast_price_pow_off_peak                 10830 non-null  float64
 11  has_gas                                     10830 non-null  int64  
 12  imp_cons                                    10830 non-null  float64
 13  margin_gross_pow_ele                        10830 non-null  float64
 14  margin_net_pow_ele                          10830 non-null  float64
 15  nb_prod_act                                 10830 non-null  int64  
 16  net_margin                                  10830 non-null  float64
 17  num_years_antig                             10830 non-null  int64  
 18  pow_max                                     10830 non-null  float64
 19  mean_year_price_p1_var                      10830 non-null  float64
 20  mean_year_price_p2_var                      10830 non-null  float64
 21  mean_year_price_p3_var                      10830 non-null  float64
 22  mean_year_price_p1_fix                      10830 non-null  float64
 23  mean_year_price_p2_fix                      10830 non-null  float64
 24  mean_year_price_p3_fix                      10830 non-null  float64
 25  mean_6m_price_p1_var                        10830 non-null  float64
 26  mean_6m_price_p2_var                        10830 non-null  float64
 27  mean_6m_price_p3_var                        10830 non-null  float64
 28  mean_6m_price_p1_fix                        10830 non-null  float64
 29  mean_6m_price_p2_fix                        10830 non-null  float64
 30  mean_6m_price_p3_fix                        10830 non-null  float64
 31  mean_3m_price_p1_var                        10830 non-null  float64
 32  mean_3m_price_p2_var                        10830 non-null  float64
 33  mean_3m_price_p3_var                        10830 non-null  float64
 34  mean_3m_price_p1_fix                        10830 non-null  float64
 35  mean_3m_price_p2_fix                        10830 non-null  float64
 36  mean_3m_price_p3_fix                        10830 non-null  float64
 37  churn                                       10830 non-null  int64  
 38  offpeak_diff_dec_january_energy             10830 non-null  float64
 39  offpeak_diff_dec_january_power              10830 non-null  float64
 40  off_peak_peak_var_mean_diff                 10830 non-null  float64
 41  peak_mid_peak_var_mean_diff                 10830 non-null  float64
 42  off_peak_mid_peak_var_mean_diff             10830 non-null  float64
 43  off_peak_peak_fix_mean_diff                 10830 non-null  float64
 44  peak_mid_peak_fix_mean_diff                 10830 non-null  float64
 45  off_peak_mid_peak_fix_mean_diff             10830 non-null  float64
 46  off_peak_peak_var_max_monthly_diff          10830 non-null  float64
 47  peak_mid_peak_var_max_monthly_diff          10830 non-null  float64
 48  off_peak_mid_peak_var_max_monthly_diff      10830 non-null  float64
 49  off_peak_peak_fix_max_monthly_diff          10830 non-null  float64
 50  peak_mid_peak_fix_max_monthly_diff          10830 non-null  float64
 51  off_peak_mid_peak_fix_max_monthly_diff      10830 non-null  float64
 52  tenure                                      10830 non-null  int32  
 53  months_activ                                10830 non-null  int32  
 54  months_to_end                               10830 non-null  int32  
 55  months_modif_prod                           10830 non-null  int32  
 56  months_renewal                              10830 non-null  int32  
 57  channel_ewpakwlliwisiwduibdlfmalxowmwpci    10830 non-null  int32  
 58  channel_foosdfpfkusacimwkcsosbicdxkicaua    10830 non-null  int32  
 59  channel_lmkebamcaaclubfxadlmueccxoimlema    10830 non-null  int32  
 60  channel_usilxuppasemubllopkaafesmlibmsdf    10830 non-null  int32  
 61  origin_up_kamkkxfxxuwbdslkwifmmcsiusiuosws  10830 non-null  bool   
 62  origin_up_ldkssxwpmemidmecebumciepifcamkci  10830 non-null  bool   
 63  origin_up_lxidpiddsbxsbosboudacockeimpuepw  10830 non-null  bool   
dtypes: bool(3), float64(48), int32(9), int64(4)
memory usage: 4.7 MB
In [62]:
correlation = df.corr()
In [63]:
# Plot correlation
plt.figure(figsize=(45, 45))
sns.heatmap(
    correlation, 
    xticklabels=correlation.columns.values,
    yticklabels=correlation.columns.values, 
    annot=True, 
    annot_kws={'size': 12}
)
# Axis ticks size
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.show()
No description has been provided for this image
In [64]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Now, display the correlation matrix
correlation_matrix = df.corr()
display(correlation_matrix)
id cons_12m cons_gas_12m cons_last_month forecast_cons_12m forecast_cons_year forecast_discount_energy forecast_meter_rent_12m forecast_price_energy_off_peak forecast_price_energy_peak forecast_price_pow_off_peak has_gas imp_cons margin_gross_pow_ele margin_net_pow_ele nb_prod_act net_margin num_years_antig pow_max mean_year_price_p1_var mean_year_price_p2_var mean_year_price_p3_var mean_year_price_p1_fix mean_year_price_p2_fix mean_year_price_p3_fix mean_6m_price_p1_var mean_6m_price_p2_var mean_6m_price_p3_var mean_6m_price_p1_fix mean_6m_price_p2_fix mean_6m_price_p3_fix mean_3m_price_p1_var mean_3m_price_p2_var mean_3m_price_p3_var mean_3m_price_p1_fix mean_3m_price_p2_fix mean_3m_price_p3_fix churn offpeak_diff_dec_january_energy offpeak_diff_dec_january_power off_peak_peak_var_mean_diff peak_mid_peak_var_mean_diff off_peak_mid_peak_var_mean_diff off_peak_peak_fix_mean_diff peak_mid_peak_fix_mean_diff off_peak_mid_peak_fix_mean_diff off_peak_peak_var_max_monthly_diff peak_mid_peak_var_max_monthly_diff off_peak_mid_peak_var_max_monthly_diff off_peak_peak_fix_max_monthly_diff peak_mid_peak_fix_max_monthly_diff off_peak_mid_peak_fix_max_monthly_diff tenure months_activ months_to_end months_modif_prod months_renewal channel_ewpakwlliwisiwduibdlfmalxowmwpci channel_foosdfpfkusacimwkcsosbicdxkicaua channel_lmkebamcaaclubfxadlmueccxoimlema channel_usilxuppasemubllopkaafesmlibmsdf origin_up_kamkkxfxxuwbdslkwifmmcsiusiuosws origin_up_ldkssxwpmemidmecebumciepifcamkci origin_up_lxidpiddsbxsbosboudacockeimpuepw
id NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
cons_12m NaN 1.000000 0.208056 0.637597 0.560381 0.305645 -0.040412 0.218569 -0.194970 0.299266 -0.198465 0.157625 0.316897 -0.082099 -0.082055 0.096010 0.319389 0.039649 0.229064 -0.197497 0.297674 0.235236 -0.169152 0.244749 0.239935 -0.194166 0.298190 0.236403 -0.172721 0.244943 0.240059 -0.192240 0.298859 0.240398 -0.177023 0.248440 0.243398 -0.020351 -0.069082 -0.092476 -0.308118 0.229332 -0.237974 -0.246618 0.240317 -0.237865 -0.306261 0.193561 -0.225558 -0.233610 0.241873 -0.227262 -0.001782 0.024443 -0.069864 0.131206 0.053129 -0.091575 -0.094734 0.341366 -0.170963 0.047390 0.005554 -0.046032
cons_gas_12m NaN 0.208056 1.000000 0.167099 0.114869 0.089734 0.004918 0.070899 -0.068117 0.089695 -0.073617 0.936642 0.095477 -0.017257 -0.017279 0.680402 0.109158 -0.031562 0.091933 -0.065972 0.089745 0.088272 -0.062874 0.088468 0.089938 -0.064012 0.089955 0.087600 -0.063679 0.087292 0.088810 -0.065010 0.090170 0.089481 -0.066527 0.088850 0.090229 -0.023863 -0.010695 -0.012580 -0.094985 0.047079 -0.086185 -0.089491 0.081020 -0.089014 -0.093136 0.036804 -0.080449 -0.083611 0.082329 -0.083103 -0.037000 -0.032873 0.003955 0.007145 -0.001304 -0.004010 -0.042855 0.067439 -0.009229 -0.008125 0.002300 0.005605
cons_last_month NaN 0.637597 0.167099 1.000000 0.301223 0.773384 -0.026131 0.319147 -0.269778 0.396347 -0.260961 0.133533 0.765409 0.021162 0.021088 0.089516 0.225797 0.028860 0.254412 -0.266886 0.398540 0.373980 -0.239132 0.380196 0.376836 -0.261935 0.398624 0.375721 -0.235379 0.381497 0.378079 -0.259280 0.398054 0.379186 -0.231977 0.384732 0.381143 -0.020197 -0.073933 -0.045920 -0.413326 0.231979 -0.360309 -0.378373 0.365809 -0.366207 -0.409099 0.196057 -0.341705 -0.360426 0.364341 -0.350454 -0.010212 0.014248 -0.072788 0.099014 0.052954 -0.087119 -0.029073 0.230049 -0.144890 -0.020708 -0.028918 0.039364
forecast_cons_12m NaN 0.560381 0.114869 0.301223 1.000000 0.375270 0.053746 0.196816 -0.136624 0.232526 -0.106523 0.106732 0.417996 -0.142003 -0.142045 0.065421 0.450188 -0.045731 0.231111 -0.139142 0.229590 0.208668 -0.066182 0.215661 0.211049 -0.134623 0.230954 0.210766 -0.072108 0.216657 0.211971 -0.134164 0.232138 0.213476 -0.084794 0.218326 0.213306 0.007649 -0.041188 -0.106918 -0.233357 0.142249 -0.197301 -0.200732 0.212432 -0.186259 -0.233242 0.120706 -0.191796 -0.199130 0.212421 -0.187308 -0.064903 -0.051330 -0.041631 -0.068461 0.048385 -0.027902 0.002085 0.079778 -0.070993 -0.059900 0.008405 0.046895
forecast_cons_year NaN 0.305645 0.089734 0.773384 0.375270 1.000000 0.007863 0.393611 -0.336673 0.427937 -0.299552 0.078982 0.990477 0.036246 0.036193 0.047516 0.242752 -0.022781 0.298480 -0.337094 0.432362 0.463915 -0.286077 0.469890 0.466077 -0.328677 0.431570 0.465943 -0.278042 0.471733 0.467879 -0.325451 0.430338 0.468336 -0.272401 0.473837 0.469765 0.004749 -0.085000 -0.050024 -0.463874 0.177668 -0.449263 -0.465742 0.451490 -0.450238 -0.459979 0.145321 -0.430314 -0.449388 0.447746 -0.436261 -0.043288 -0.028887 -0.046736 -0.019037 0.040997 -0.046262 0.053469 0.037262 -0.082562 -0.099061 -0.029786 0.109370
forecast_discount_energy NaN -0.040412 0.004918 -0.026131 0.053746 0.007863 1.000000 -0.008220 0.386674 0.056343 0.073329 0.010378 0.021783 0.248304 0.247414 0.107554 0.085858 -0.075326 0.000857 0.312703 0.035606 0.023426 0.059829 -0.005125 -0.003300 0.368988 0.048281 0.031046 0.070777 -0.004460 -0.002355 0.385339 0.054565 0.033978 0.071941 -0.004390 -0.002254 0.016569 0.427082 0.074153 0.072547 0.033423 0.103495 0.016424 -0.008172 0.019261 0.094195 0.041106 0.138286 0.021593 -0.005071 0.025368 -0.125695 -0.077109 -0.067661 -0.191101 0.191076 -0.015067 0.041276 -0.068687 0.030615 -0.074705 -0.014981 0.077191
forecast_meter_rent_12m NaN 0.218569 0.070899 0.319147 0.196816 0.393611 -0.008220 1.000000 -0.538656 0.652618 -0.433310 0.060150 0.360668 0.144883 0.144908 0.024600 0.197255 -0.021771 0.421056 -0.538755 0.666853 0.771629 -0.436593 0.775378 0.771853 -0.529645 0.659873 0.770004 -0.429629 0.773970 0.770603 -0.525860 0.654712 0.767223 -0.419649 0.771322 0.767954 0.038398 -0.138425 -0.069200 -0.721582 0.202681 -0.738864 -0.761439 0.739977 -0.735287 -0.712991 0.165645 -0.706956 -0.733256 0.736031 -0.708900 -0.021607 -0.018337 -0.034449 -0.022078 0.034975 -0.007484 0.043715 0.018631 -0.077296 -0.050595 -0.000226 0.044973
forecast_price_energy_off_peak NaN -0.194970 -0.068117 -0.269778 -0.136624 -0.336673 0.386674 -0.538656 1.000000 -0.398700 0.695291 -0.054137 -0.293381 0.132602 0.132217 0.022538 -0.130940 -0.014720 -0.408740 0.965518 -0.426233 -0.708144 0.669616 -0.727281 -0.719571 0.981908 -0.411815 -0.706868 0.678249 -0.728880 -0.721197 0.983559 -0.404341 -0.706584 0.662649 -0.729676 -0.722141 -0.027420 0.520849 0.254693 0.663271 0.143750 0.858354 0.766239 -0.702097 0.758497 0.671506 0.126405 0.852115 0.739865 -0.695747 0.736335 -0.022696 -0.018111 0.021421 -0.006928 0.045929 0.008797 -0.071438 -0.006367 0.104883 0.082512 -0.000745 -0.072813
forecast_price_energy_peak NaN 0.299266 0.089695 0.396347 0.232526 0.427937 0.056343 0.652618 -0.398700 1.000000 -0.498089 0.082285 0.401087 0.177142 0.177239 0.047625 0.227122 0.010587 0.432647 -0.395685 0.993733 0.816469 -0.500530 0.808654 0.810224 -0.387566 0.996139 0.817939 -0.492440 0.809466 0.811253 -0.383647 0.995653 0.817534 -0.482524 0.808941 0.810771 0.030926 -0.085036 -0.070223 -0.942846 0.725950 -0.715069 -0.803159 0.762179 -0.783583 -0.932682 0.609845 -0.669854 -0.763882 0.759631 -0.746201 -0.006468 0.010784 -0.048325 0.036395 0.056784 -0.016463 -0.015435 0.114532 -0.092951 0.002831 0.018001 -0.015803
forecast_price_pow_off_peak NaN -0.198465 -0.073617 -0.260961 -0.106523 -0.299552 0.073329 -0.433310 0.695291 -0.498089 1.000000 -0.065769 -0.256476 0.002166 0.002066 -0.029349 -0.096761 -0.034052 -0.288604 0.684190 -0.512662 -0.614747 0.955038 -0.581411 -0.646471 0.675319 -0.507123 -0.615263 0.948081 -0.581727 -0.647276 0.670356 -0.503212 -0.615687 0.923371 -0.582594 -0.647570 0.005259 0.257962 0.216953 0.642567 -0.128435 0.686218 0.696519 -0.431536 0.779720 0.644297 -0.096320 0.674773 0.679188 -0.426496 0.757334 0.005188 -0.023514 0.098508 -0.027601 -0.073259 0.001886 -0.019017 -0.055722 0.089576 0.066178 -0.026341 -0.039487
has_gas NaN 0.157625 0.936642 0.133533 0.106732 0.078982 0.010378 0.060150 -0.054137 0.082285 -0.065769 1.000000 0.083756 -0.019030 -0.019031 0.730074 0.096486 -0.032483 0.075944 -0.052003 0.081917 0.077479 -0.055146 0.077324 0.079376 -0.050264 0.082523 0.077030 -0.056025 0.076355 0.078454 -0.050917 0.082687 0.078124 -0.059076 0.077070 0.079034 -0.023903 -0.002954 -0.009573 -0.084028 0.046906 -0.073388 -0.078256 0.069417 -0.078465 -0.082687 0.036487 -0.069577 -0.075130 0.069884 -0.075254 -0.037820 -0.034324 0.002174 -0.001392 0.001964 0.003054 -0.041562 0.052856 -0.000205 -0.020315 0.009766 0.010960
imp_cons NaN 0.316897 0.095477 0.765409 0.417996 0.990477 0.021783 0.360668 -0.293381 0.401087 -0.256476 0.083756 1.000000 0.025559 0.025487 0.052173 0.268100 -0.027276 0.293575 -0.294000 0.404205 0.424982 -0.239973 0.430823 0.426962 -0.285744 0.404128 0.427296 -0.233200 0.432637 0.428729 -0.282665 0.403374 0.429856 -0.229560 0.434744 0.430589 0.002832 -0.069673 -0.048173 -0.426788 0.177187 -0.405894 -0.422556 0.414621 -0.406308 -0.422948 0.146047 -0.388085 -0.407740 0.411407 -0.393848 -0.048613 -0.033826 -0.046561 -0.023823 0.043987 -0.045627 0.048269 0.040859 -0.079644 -0.096117 -0.028076 0.105503
margin_gross_pow_ele NaN -0.082099 -0.017257 0.021162 -0.142003 0.036246 0.248304 0.144883 0.132602 0.177142 0.002166 -0.019030 0.025559 1.000000 0.999893 -0.005470 0.033555 -0.050916 0.421764 0.118883 0.172445 0.221761 -0.024227 0.196256 0.211744 0.138706 0.176655 0.226426 -0.008049 0.196963 0.212734 0.134892 0.177117 0.225570 -0.009040 0.196483 0.212245 0.104747 0.223785 0.178105 -0.102608 0.024157 -0.107597 -0.175469 0.157458 -0.175147 -0.091141 0.029536 -0.078987 -0.155153 0.157086 -0.151846 -0.132935 -0.080712 -0.234475 0.003023 0.243378 0.024831 0.017309 -0.040670 0.000585 -0.044511 -0.029678 0.061287
margin_net_pow_ele NaN -0.082055 -0.017279 0.021088 -0.142045 0.036193 0.247414 0.144908 0.132217 0.177239 0.002066 -0.019031 0.025487 0.999893 1.000000 -0.005446 0.033350 -0.051177 0.421833 0.118572 0.172564 0.221846 -0.024326 0.196338 0.211825 0.138354 0.176772 0.226509 -0.008155 0.197044 0.212815 0.134526 0.177226 0.225644 -0.009146 0.196563 0.212325 0.104809 0.223065 0.177966 -0.102807 0.024262 -0.107775 -0.175559 0.157535 -0.175239 -0.091353 0.029648 -0.079181 -0.155237 0.157164 -0.151932 -0.133159 -0.080952 -0.234395 0.003181 0.243163 0.024883 0.017463 -0.040964 0.000648 -0.044416 -0.030075 0.061492
nb_prod_act NaN 0.096010 0.680402 0.089516 0.065421 0.047516 0.107554 0.024600 0.022538 0.047625 -0.029349 0.730074 0.052173 -0.005470 -0.005446 1.000000 0.060482 -0.033378 0.026135 0.009723 0.043750 0.032486 -0.022482 0.029427 0.031841 0.022747 0.047054 0.033929 -0.020339 0.029264 0.031751 0.025031 0.047882 0.034861 -0.022843 0.029533 0.031877 -0.012798 0.071861 0.005834 -0.032681 0.036360 -0.018706 -0.030081 0.023443 -0.031576 -0.028123 0.027692 -0.010856 -0.028812 0.023152 -0.029954 -0.044088 -0.036940 0.000461 -0.030133 0.033557 -0.005054 -0.021126 0.018388 0.015661 -0.017748 0.001124 0.014986
net_margin NaN 0.319389 0.109158 0.225797 0.450188 0.242752 0.085858 0.197255 -0.130940 0.227122 -0.096761 0.096486 0.268100 0.033555 0.033350 0.060482 1.000000 -0.050923 0.341462 -0.136052 0.224330 0.234617 -0.083027 0.240585 0.223170 -0.130266 0.224544 0.235421 -0.084417 0.241398 0.223971 -0.128870 0.225681 0.238585 -0.083595 0.244695 0.224993 0.047705 -0.005951 -0.044490 -0.228043 0.099919 -0.214032 -0.225770 0.259333 -0.200584 -0.221083 0.091578 -0.192561 -0.208609 0.266221 -0.192748 -0.062418 -0.052375 -0.038522 -0.020199 0.035024 -0.027863 0.010997 0.057804 -0.058315 -0.057064 -0.000987 0.051302
num_years_antig NaN 0.039649 -0.031562 0.028860 -0.045731 -0.022781 -0.075326 -0.021771 -0.014720 0.010587 -0.034052 -0.032483 -0.027276 -0.050916 -0.051177 -0.033378 -0.050923 1.000000 -0.033258 -0.018805 0.008216 -0.054951 -0.033458 -0.048904 -0.048826 -0.017746 0.009940 -0.053830 -0.034620 -0.048567 -0.048487 -0.015633 0.009647 -0.053631 -0.029653 -0.048468 -0.048303 -0.089442 -0.027536 -0.025019 -0.012848 0.084455 0.030739 0.035823 -0.046407 0.029531 -0.021747 0.058297 0.015726 0.025588 -0.048888 0.019178 0.903659 0.970263 -0.060325 0.444468 0.038701 -0.055191 -0.077413 0.272951 -0.143942 0.389631 -0.036552 -0.318406
pow_max NaN 0.229064 0.091933 0.254412 0.231111 0.298480 0.000857 0.421056 -0.408740 0.432647 -0.288604 0.075944 0.293575 0.421764 0.421833 0.026135 0.341462 -0.033258 1.000000 -0.414287 0.439183 0.531118 -0.283945 0.545518 0.529002 -0.403496 0.436022 0.532911 -0.280031 0.546494 0.530001 -0.400545 0.433462 0.532889 -0.274791 0.546339 0.529478 0.045617 -0.092034 -0.049445 -0.494572 0.104328 -0.525197 -0.531066 0.546181 -0.499692 -0.488461 0.092617 -0.497001 -0.502044 0.543556 -0.473925 -0.044448 -0.034423 -0.051153 -0.009237 0.037946 -0.017853 0.037618 0.018000 -0.059891 -0.077230 -0.027328 0.088470
mean_year_price_p1_var NaN -0.197497 -0.065972 -0.266886 -0.139142 -0.337094 0.312703 -0.538755 0.965518 -0.395685 0.684190 -0.052003 -0.294000 0.118883 0.118572 0.009723 -0.136052 -0.018805 -0.414287 1.000000 -0.418362 -0.713466 0.695842 -0.734121 -0.724174 0.986813 -0.404906 -0.710860 0.698998 -0.732775 -0.722863 0.967864 -0.398061 -0.708277 0.677565 -0.730093 -0.720332 -0.024367 0.373535 0.213962 0.668039 0.164490 0.875224 0.777432 -0.712644 0.769451 0.670525 0.138393 0.860997 0.748774 -0.707167 0.744422 -0.017387 -0.018424 0.021410 -0.006834 0.020629 0.006681 -0.073199 -0.003652 0.105954 0.089631 -0.000567 -0.079271
mean_year_price_p2_var NaN 0.297674 0.089745 0.398540 0.229590 0.432362 0.035606 0.666853 -0.426233 0.993733 -0.512662 0.081917 0.404205 0.172445 0.172564 0.043750 0.224330 0.008216 0.439183 -0.418362 1.000000 0.834690 -0.513756 0.827324 0.829144 -0.411928 0.997964 0.833047 -0.503982 0.825767 0.827748 -0.408969 0.993684 0.829788 -0.492503 0.822816 0.824784 0.031242 -0.100589 -0.060631 -0.955357 0.713907 -0.736326 -0.822036 0.779385 -0.802309 -0.943171 0.608886 -0.691198 -0.781965 0.775702 -0.764235 -0.007428 0.009144 -0.049872 0.034023 0.053041 -0.018207 -0.012485 0.113643 -0.094421 0.001708 0.017359 -0.014324
mean_year_price_p3_var NaN 0.235236 0.088272 0.373980 0.208668 0.463915 0.023426 0.771629 -0.708144 0.816469 -0.614747 0.077479 0.424982 0.221761 0.221846 0.032486 0.234617 -0.054951 0.531118 -0.713466 0.834690 1.000000 -0.622942 0.992426 0.993648 -0.698375 0.826097 0.998089 -0.612698 0.990855 0.992269 -0.691986 0.819786 0.994316 -0.597326 0.987424 0.988819 0.051389 -0.166182 -0.079505 -0.915918 0.210255 -0.963379 -0.987415 0.936672 -0.963506 -0.903773 0.175448 -0.918704 -0.945795 0.931142 -0.924010 -0.059740 -0.051500 -0.049076 -0.038033 0.042113 -0.016419 0.069043 0.007014 -0.094566 -0.093915 -0.007013 0.088510
mean_year_price_p1_fix NaN -0.169152 -0.062874 -0.239132 -0.066182 -0.286077 0.059829 -0.436593 0.669616 -0.500530 0.955038 -0.055146 -0.239973 -0.024227 -0.024326 -0.022482 -0.083027 -0.033458 -0.283945 0.695842 -0.513756 -0.622942 1.000000 -0.586937 -0.655916 0.682716 -0.505782 -0.619948 0.993242 -0.583849 -0.653379 0.672929 -0.500014 -0.617036 0.961508 -0.581263 -0.650396 0.003618 0.223553 0.179565 0.647253 -0.119957 0.696334 0.710317 -0.429624 0.799733 0.646273 -0.089642 0.681811 0.689289 -0.425158 0.772011 -0.017800 -0.032158 0.036194 -0.009258 -0.021242 0.009300 -0.021279 -0.048636 0.078199 0.053349 -0.015422 -0.036116
mean_year_price_p2_fix NaN 0.244749 0.088468 0.380196 0.215661 0.469890 -0.005125 0.775378 -0.727281 0.808654 -0.581411 0.077324 0.430823 0.196256 0.196338 0.029427 0.240585 -0.048904 0.545518 -0.734121 0.827324 0.992426 -0.586937 1.000000 0.989590 -0.719114 0.818188 0.990357 -0.577816 0.998477 0.988175 -0.711966 0.811906 0.986709 -0.564195 0.994948 0.984719 0.054609 -0.174776 -0.081622 -0.916602 0.206809 -0.966055 -0.986797 0.965025 -0.950267 -0.904261 0.173096 -0.920534 -0.944682 0.959447 -0.911615 -0.054174 -0.045681 -0.049518 -0.030298 0.039878 -0.014407 0.065604 0.012086 -0.097026 -0.090857 -0.006741 0.085600
mean_year_price_p3_fix NaN 0.239935 0.089938 0.376836 0.211049 0.466077 -0.003300 0.771853 -0.719571 0.810224 -0.646471 0.079376 0.426962 0.211744 0.211825 0.031841 0.223170 -0.048826 0.529002 -0.724174 0.829144 0.993648 -0.655916 0.989590 1.000000 -0.710033 0.819993 0.991506 -0.644967 0.987840 0.998480 -0.703724 0.813610 0.987938 -0.628365 0.984489 0.995050 0.048268 -0.176561 -0.080056 -0.914857 0.208486 -0.963091 -0.991546 0.917252 -0.977727 -0.903286 0.172571 -0.919749 -0.950917 0.911224 -0.938048 -0.052940 -0.045470 -0.047293 -0.033887 0.037691 -0.014807 0.064938 0.011226 -0.094793 -0.090144 -0.005798 0.084280
mean_6m_price_p1_var NaN -0.194166 -0.064012 -0.261935 -0.134623 -0.328677 0.368988 -0.529645 0.981908 -0.387566 0.675319 -0.050264 -0.285744 0.138706 0.138354 0.022747 -0.130266 -0.017746 -0.403496 0.986813 -0.411928 -0.698375 0.682716 -0.719114 -0.710033 1.000000 -0.396771 -0.695899 0.695428 -0.719248 -0.710173 0.988765 -0.389214 -0.693464 0.677459 -0.717170 -0.708239 -0.025368 0.492320 0.260501 0.658479 0.156722 0.859759 0.761760 -0.696871 0.754554 0.663735 0.135619 0.850255 0.734743 -0.690381 0.731223 -0.026183 -0.021215 0.010765 -0.008263 0.044688 0.008749 -0.071737 -0.005009 0.103673 0.081036 0.000391 -0.072338
mean_6m_price_p2_var NaN 0.298190 0.089955 0.398624 0.230954 0.431570 0.048281 0.659873 -0.411815 0.996139 -0.507123 0.082523 0.404128 0.176655 0.176772 0.047054 0.224544 0.009940 0.436022 -0.404906 0.997964 0.826097 -0.505782 0.818188 0.819993 -0.396771 1.000000 0.827111 -0.495722 0.818695 0.820692 -0.393183 0.997043 0.824806 -0.484044 0.816497 0.818476 0.030647 -0.088697 -0.060626 -0.949312 0.721219 -0.725244 -0.812498 0.770769 -0.792813 -0.937963 0.613049 -0.680389 -0.773348 0.766106 -0.755714 -0.006922 0.010111 -0.049639 0.035279 0.055073 -0.017429 -0.015015 0.114679 -0.092655 0.002768 0.016425 -0.014587
mean_6m_price_p3_var NaN 0.236403 0.087600 0.375721 0.210766 0.465943 0.031046 0.770004 -0.706868 0.817939 -0.615263 0.077030 0.427296 0.226426 0.226509 0.033929 0.235421 -0.053830 0.532911 -0.710860 0.833047 0.998089 -0.619948 0.990357 0.991506 -0.695899 0.827111 1.000000 -0.610252 0.991865 0.993217 -0.689555 0.821740 0.997336 -0.594577 0.989385 0.990686 0.050784 -0.167338 -0.085197 -0.913724 0.209767 -0.961062 -0.985017 0.934847 -0.960970 -0.902119 0.176225 -0.916096 -0.942883 0.927988 -0.921219 -0.060002 -0.051184 -0.051563 -0.035416 0.043921 -0.015600 0.068330 0.006931 -0.094258 -0.094464 -0.007413 0.089288
mean_6m_price_p1_fix NaN -0.172721 -0.063679 -0.235379 -0.072108 -0.278042 0.070777 -0.429629 0.678249 -0.492440 0.948081 -0.056025 -0.233200 -0.008049 -0.008155 -0.020339 -0.084417 -0.034620 -0.280031 0.698998 -0.503982 -0.612698 0.993242 -0.577816 -0.644967 0.695428 -0.495722 -0.610252 1.000000 -0.575228 -0.643098 0.688875 -0.489638 -0.607041 0.975133 -0.572146 -0.639863 0.002885 0.269564 0.247727 0.640273 -0.115632 0.690470 0.701036 -0.424326 0.789146 0.640796 -0.087424 0.677123 0.679114 -0.420096 0.760321 -0.020838 -0.033933 0.034926 -0.009934 -0.015959 0.008518 -0.019502 -0.050259 0.078152 0.049808 -0.017303 -0.031597
mean_6m_price_p2_fix NaN 0.244943 0.087292 0.381497 0.216657 0.471733 -0.004460 0.773970 -0.728880 0.809466 -0.581727 0.076355 0.432637 0.196963 0.197044 0.029264 0.241398 -0.048567 0.546494 -0.732775 0.825767 0.990855 -0.583849 0.998477 0.987840 -0.719248 0.818695 0.991865 -0.575228 1.000000 0.989445 -0.712613 0.813147 0.989201 -0.561188 0.997450 0.986911 0.054167 -0.182675 -0.086833 -0.914888 0.206041 -0.964455 -0.984855 0.963999 -0.948017 -0.903138 0.173093 -0.918814 -0.942160 0.957014 -0.909139 -0.053371 -0.045367 -0.048921 -0.028539 0.037257 -0.013989 0.065331 0.011336 -0.096271 -0.091132 -0.007606 0.086476
mean_6m_price_p3_fix NaN 0.240059 0.088810 0.378079 0.211971 0.467879 -0.002355 0.770603 -0.721197 0.811253 -0.647276 0.078454 0.428729 0.212734 0.212815 0.031751 0.223971 -0.048487 0.530001 -0.722863 0.827748 0.992269 -0.653379 0.988175 0.998480 -0.710173 0.820692 0.993217 -0.643098 0.989445 1.000000 -0.704281 0.815067 0.990605 -0.626030 0.987017 0.997482 0.047981 -0.184278 -0.085814 -0.913287 0.207761 -0.961638 -0.989808 0.916104 -0.975812 -0.902369 0.172476 -0.918321 -0.948696 0.908661 -0.935864 -0.052090 -0.045139 -0.046541 -0.032316 0.034976 -0.014415 0.064637 0.010495 -0.093998 -0.090457 -0.006603 0.085145
mean_3m_price_p1_var NaN -0.192240 -0.065010 -0.259280 -0.134164 -0.325451 0.385339 -0.525860 0.983559 -0.383647 0.670356 -0.050917 -0.282665 0.134892 0.134526 0.025031 -0.128870 -0.015633 -0.400545 0.967864 -0.408969 -0.691986 0.672929 -0.711966 -0.703724 0.988765 -0.393183 -0.689555 0.688875 -0.712613 -0.704281 1.000000 -0.382590 -0.687172 0.692036 -0.711693 -0.703306 -0.026152 0.551663 0.309495 0.649890 0.153853 0.848098 0.753587 -0.688577 0.746815 0.657514 0.134265 0.842641 0.727449 -0.682581 0.724143 -0.026518 -0.020066 0.011259 -0.004742 0.049878 0.009390 -0.072142 -0.003627 0.102321 0.078202 0.002147 -0.071111
mean_3m_price_p2_var NaN 0.298859 0.090170 0.398054 0.232138 0.430338 0.054565 0.654712 -0.404341 0.995653 -0.503212 0.082687 0.403374 0.177117 0.177226 0.047882 0.225681 0.009647 0.433462 -0.398061 0.993684 0.819786 -0.500014 0.811906 0.813610 -0.389214 0.997043 0.821740 -0.489638 0.813147 0.815067 -0.382590 1.000000 0.822276 -0.473290 0.813394 0.815353 0.030509 -0.080187 -0.052229 -0.943579 0.721646 -0.718268 -0.805883 0.765010 -0.786132 -0.932933 0.615104 -0.673948 -0.767966 0.761157 -0.750336 -0.007278 0.010057 -0.049086 0.035744 0.057283 -0.017330 -0.015558 0.113502 -0.090881 0.001988 0.017201 -0.014469
mean_3m_price_p3_var NaN 0.240398 0.089481 0.379186 0.213476 0.468336 0.033978 0.767223 -0.706584 0.817534 -0.615687 0.078124 0.429856 0.225570 0.225644 0.034861 0.238585 -0.053631 0.532889 -0.708277 0.829788 0.994316 -0.617036 0.986709 0.987938 -0.693464 0.824806 0.997336 -0.607041 0.989201 0.990605 -0.687172 0.822276 1.000000 -0.588454 0.991672 0.992993 0.049234 -0.168830 -0.080010 -0.910214 0.208779 -0.957469 -0.981264 0.931251 -0.957322 -0.899688 0.175511 -0.913754 -0.940078 0.925040 -0.918524 -0.059581 -0.050784 -0.051487 -0.034919 0.043160 -0.014971 0.068227 0.006624 -0.094548 -0.094843 -0.006298 0.088809
mean_3m_price_p1_fix NaN -0.177023 -0.066527 -0.231977 -0.084794 -0.272401 0.071941 -0.419649 0.662649 -0.482524 0.923371 -0.059076 -0.229560 -0.009040 -0.009146 -0.022843 -0.083595 -0.029653 -0.274791 0.677565 -0.492503 -0.597326 0.961508 -0.564195 -0.628365 0.677459 -0.484044 -0.594577 0.975133 -0.561188 -0.626030 0.692036 -0.473290 -0.588454 1.000000 -0.554858 -0.619510 0.000919 0.305390 0.367258 0.623896 -0.114799 0.671656 0.682845 -0.416869 0.767118 0.623514 -0.090453 0.656426 0.659746 -0.414482 0.737569 -0.008148 -0.024877 0.052728 -0.006272 -0.033038 0.008519 -0.020555 -0.047180 0.076479 0.052659 -0.017666 -0.033850
mean_3m_price_p2_fix NaN 0.248440 0.088850 0.384732 0.218326 0.473837 -0.004390 0.771322 -0.729676 0.808941 -0.582594 0.077070 0.434744 0.196483 0.196563 0.029533 0.244695 -0.048468 0.546339 -0.730093 0.822816 0.987424 -0.581263 0.994948 0.984489 -0.717170 0.816497 0.989385 -0.572146 0.997450 0.987017 -0.711693 0.813394 0.991672 -0.554858 1.000000 0.989498 0.052446 -0.187894 -0.079112 -0.911598 0.205166 -0.961060 -0.981270 0.960337 -0.944633 -0.900975 0.172490 -0.916833 -0.939551 0.954094 -0.906708 -0.052884 -0.045045 -0.049004 -0.027867 0.036285 -0.013497 0.065191 0.011189 -0.096589 -0.091174 -0.006731 0.085874
mean_3m_price_p3_fix NaN 0.243398 0.090229 0.381143 0.213306 0.469765 -0.002254 0.767954 -0.722141 0.810771 -0.647570 0.079034 0.430589 0.212245 0.212325 0.031877 0.224993 -0.048303 0.529478 -0.720332 0.824784 0.988819 -0.650396 0.984719 0.995050 -0.708239 0.818476 0.990686 -0.639863 0.986911 0.997482 -0.703306 0.815353 0.992993 -0.619510 0.989498 1.000000 0.046437 -0.189211 -0.078911 -0.910035 0.206886 -0.958288 -0.986207 0.912789 -0.972254 -0.900214 0.171981 -0.916281 -0.945983 0.906215 -0.933227 -0.051463 -0.044687 -0.046551 -0.031732 0.033923 -0.013945 0.064347 0.010539 -0.094297 -0.090452 -0.005509 0.084341
churn NaN -0.020351 -0.023863 -0.020197 0.007649 0.004749 0.016569 0.038398 -0.027420 0.030926 0.005259 -0.023903 0.002832 0.104747 0.104809 -0.012798 0.047705 -0.089442 0.045617 -0.024367 0.031242 0.051389 0.003618 0.054609 0.048268 -0.025368 0.030647 0.050784 0.002885 0.054167 0.047981 -0.026152 0.030509 0.049234 0.000919 0.052446 0.046437 1.000000 -0.004938 0.003981 -0.033522 -0.009880 -0.044801 -0.046753 0.063215 -0.037384 -0.031032 -0.004537 -0.037194 -0.039721 0.063200 -0.030601 -0.089871 -0.090299 0.004463 -0.063759 0.001038 -0.019601 0.071212 -0.072318 -0.004627 -0.090327 -0.030744 0.102576
offpeak_diff_dec_january_energy NaN -0.069082 -0.010695 -0.073933 -0.041188 -0.085000 0.427082 -0.138425 0.520849 -0.085036 0.257962 -0.002954 -0.069673 0.223785 0.223065 0.071861 -0.005951 -0.027536 -0.092034 0.373535 -0.100589 -0.166182 0.223553 -0.174776 -0.176561 0.492320 -0.088697 -0.167338 0.269564 -0.182675 -0.184278 0.551663 -0.080187 -0.168830 0.305390 -0.187894 -0.189211 -0.004938 1.000000 0.527924 0.203916 0.032733 0.257669 0.196667 -0.162096 0.202587 0.225120 0.050493 0.291948 0.208231 -0.152438 0.215584 -0.069605 -0.043952 -0.036147 -0.006001 0.123733 0.023432 -0.028421 -0.009676 0.033512 -0.018624 0.013623 0.006536
offpeak_diff_dec_january_power NaN -0.092476 -0.012580 -0.045920 -0.106918 -0.050024 0.074153 -0.069200 0.254693 -0.070223 0.216953 -0.009573 -0.048173 0.178105 0.177966 0.005834 -0.044490 -0.025019 -0.049445 0.213962 -0.060631 -0.079505 0.179565 -0.081622 -0.080056 0.260501 -0.060626 -0.085197 0.247727 -0.086833 -0.085814 0.309495 -0.052229 -0.080010 0.367258 -0.079112 -0.078911 0.003981 0.527924 1.000000 0.119273 -0.006543 0.136767 0.106882 -0.080072 0.113601 0.138729 0.001650 0.161482 0.123407 -0.077042 0.133373 -0.005744 -0.015404 0.054127 0.001119 -0.016148 0.005314 -0.010266 -0.012180 0.025074 0.009481 -0.006961 -0.003283
off_peak_peak_var_mean_diff NaN -0.308118 -0.094985 -0.413326 -0.233357 -0.463874 0.072547 -0.721582 0.663271 -0.942846 0.642567 -0.084028 -0.426788 -0.102608 -0.102807 -0.032681 -0.228043 -0.012848 -0.494572 0.668039 -0.955357 -0.915918 0.647253 -0.916602 -0.914857 0.658479 -0.949312 -0.913724 0.640273 -0.914888 -0.913287 0.649890 -0.943579 -0.910214 0.623896 -0.911598 -0.910035 -0.033522 0.203916 0.119273 1.000000 -0.531376 0.887949 0.926358 -0.870341 0.907600 0.990824 -0.453824 0.846350 0.884207 -0.865541 0.868266 0.000430 -0.013485 0.047823 -0.030097 -0.036745 0.017090 -0.013582 -0.094292 0.111822 0.027757 -0.014406 -0.014050
peak_mid_peak_var_mean_diff NaN 0.229332 0.047079 0.231979 0.142249 0.177668 0.033423 0.202681 0.143750 0.725950 -0.128435 0.046906 0.177187 0.024157 0.024262 0.036360 0.099919 0.084455 0.104328 0.164490 0.713907 0.210255 -0.119957 0.206809 0.208486 0.156722 0.721219 0.209767 -0.115632 0.206041 0.207761 0.153853 0.721646 0.208779 -0.114799 0.205166 0.206886 -0.009880 0.032733 -0.006543 -0.531376 1.000000 -0.082201 -0.203793 0.192598 -0.199173 -0.525185 0.857822 -0.058894 -0.185578 0.193091 -0.181804 0.062773 0.081716 -0.026134 0.108757 0.040614 -0.011445 -0.109953 0.192823 -0.047378 0.122445 0.039733 -0.137970
off_peak_mid_peak_var_mean_diff NaN -0.237974 -0.086185 -0.360309 -0.197301 -0.449263 0.103495 -0.738864 0.858354 -0.715069 0.686218 -0.073388 -0.405894 -0.107597 -0.107775 -0.018706 -0.214032 0.030739 -0.525197 0.875224 -0.736326 -0.963379 0.696334 -0.966055 -0.963091 0.859759 -0.725244 -0.961062 0.690470 -0.964455 -0.961638 0.848098 -0.718268 -0.957469 0.671656 -0.961060 -0.958288 -0.044801 0.257669 0.136767 0.887949 -0.082201 1.000000 0.979170 -0.919346 0.959610 0.880516 -0.068159 0.963714 0.939470 -0.913432 0.922765 0.034587 0.028502 0.042073 0.023640 -0.021178 0.013891 -0.075676 -0.006240 0.105830 0.099135 0.004625 -0.091439
off_peak_peak_fix_mean_diff NaN -0.246618 -0.089491 -0.378373 -0.200732 -0.465742 0.016424 -0.761439 0.766239 -0.803159 0.696519 -0.078256 -0.422556 -0.175469 -0.175559 -0.030081 -0.225770 0.035823 -0.531066 0.777432 -0.822036 -0.987415 0.710317 -0.986797 -0.991546 0.761760 -0.812498 -0.985017 0.701036 -0.984855 -0.989808 0.753587 -0.805883 -0.981264 0.682845 -0.981270 -0.986207 -0.046753 0.196667 0.106882 0.926358 -0.203793 0.979170 1.000000 -0.924921 0.986129 0.915433 -0.168420 0.936690 0.959180 -0.919178 0.946980 0.043537 0.033282 0.050291 0.024488 -0.038919 0.014385 -0.061292 -0.020236 0.099996 0.089662 0.002776 -0.081645
peak_mid_peak_fix_mean_diff NaN 0.240317 0.081020 0.365809 0.212432 0.451490 -0.008172 0.739977 -0.702097 0.762179 -0.431536 0.069417 0.414621 0.157458 0.157535 0.023443 0.259333 -0.046407 0.546181 -0.712644 0.779385 0.936672 -0.429624 0.965025 0.917252 -0.696871 0.770769 0.934847 -0.424326 0.963999 0.916104 -0.688577 0.765010 0.931251 -0.416869 0.960337 0.912789 0.063215 -0.162096 -0.080072 -0.870341 0.192598 -0.919346 -0.924921 1.000000 -0.848993 -0.857264 0.164717 -0.872314 -0.882372 0.995542 -0.814295 -0.053500 -0.043602 -0.050900 -0.022125 0.041711 -0.012900 0.063279 0.012999 -0.095862 -0.087254 -0.008096 0.083388
off_peak_mid_peak_fix_mean_diff NaN -0.237865 -0.089014 -0.366207 -0.186259 -0.450238 0.019261 -0.735287 0.758497 -0.783583 0.779720 -0.078465 -0.406308 -0.175147 -0.175239 -0.031576 -0.200584 0.029531 -0.499692 0.769451 -0.802309 -0.963506 0.799733 -0.950267 -0.977727 0.754554 -0.792813 -0.960970 0.789146 -0.948017 -0.975812 0.746815 -0.786132 -0.957322 0.767118 -0.944633 -0.972254 -0.037384 0.202587 0.113601 0.907600 -0.199173 0.959610 0.986129 -0.848993 1.000000 0.898124 -0.162179 0.921099 0.947968 -0.842956 0.960733 0.037156 0.027223 0.047678 0.024378 -0.035884 0.014362 -0.057565 -0.022452 0.097136 0.086530 0.000323 -0.077074
off_peak_peak_var_max_monthly_diff NaN -0.306261 -0.093136 -0.409099 -0.233242 -0.459979 0.094195 -0.712991 0.671506 -0.932682 0.644297 -0.082687 -0.422948 -0.091141 -0.091353 -0.028123 -0.221083 -0.021747 -0.488461 0.670525 -0.943171 -0.903773 0.646273 -0.904261 -0.903286 0.663735 -0.937963 -0.902119 0.640796 -0.903138 -0.902369 0.657514 -0.932933 -0.899688 0.623514 -0.900975 -0.900214 -0.031032 0.225120 0.138729 0.990824 -0.525185 0.880516 0.915433 -0.857264 0.898124 1.000000 -0.416295 0.870696 0.901768 -0.846248 0.886314 -0.011529 -0.022407 0.047543 -0.044974 -0.020487 0.017745 -0.011783 -0.097560 0.112311 0.024003 -0.012544 -0.012078
peak_mid_peak_var_max_monthly_diff NaN 0.193561 0.036804 0.196057 0.120706 0.145321 0.041106 0.165645 0.126405 0.609845 -0.096320 0.036487 0.146047 0.029536 0.029648 0.027692 0.091578 0.058297 0.092617 0.138393 0.608886 0.175448 -0.089642 0.173096 0.172571 0.135619 0.613049 0.176225 -0.087424 0.173093 0.172476 0.134265 0.615104 0.175511 -0.090453 0.172490 0.171981 -0.004537 0.050493 0.001650 -0.453824 0.857822 -0.068159 -0.168420 0.164717 -0.162179 -0.416295 1.000000 0.008404 -0.092847 0.176063 -0.090047 0.042890 0.056975 -0.010794 0.073292 0.031602 -0.011392 -0.088390 0.156937 -0.038637 0.110758 0.039143 -0.127103
off_peak_mid_peak_var_max_monthly_diff NaN -0.225558 -0.080449 -0.341705 -0.191796 -0.430314 0.138286 -0.706956 0.852115 -0.669854 0.674773 -0.069577 -0.388085 -0.078987 -0.079181 -0.010856 -0.192561 0.015726 -0.497001 0.860997 -0.691198 -0.918704 0.681811 -0.920534 -0.919749 0.850255 -0.680389 -0.916096 0.677123 -0.918814 -0.918321 0.842641 -0.673948 -0.913754 0.656426 -0.916833 -0.916281 -0.037194 0.291948 0.161482 0.846350 -0.058894 0.963714 0.936690 -0.872314 0.921099 0.870696 0.008404 1.000000 0.968498 -0.855284 0.952830 0.015102 0.013364 0.037271 0.004350 0.002807 0.012425 -0.067257 -0.012020 0.101387 0.089084 0.007089 -0.084316
off_peak_peak_fix_max_monthly_diff NaN -0.233610 -0.083611 -0.360426 -0.199130 -0.449388 0.021593 -0.733256 0.739865 -0.763882 0.679188 -0.075130 -0.407740 -0.155153 -0.155237 -0.028812 -0.208609 0.025588 -0.502044 0.748774 -0.781965 -0.945795 0.689289 -0.944682 -0.950917 0.734743 -0.773348 -0.942883 0.679114 -0.942160 -0.948696 0.727449 -0.767966 -0.940078 0.659746 -0.939551 -0.945983 -0.039721 0.208231 0.123407 0.884207 -0.185578 0.939470 0.959180 -0.882372 0.947968 0.901768 -0.092847 0.968498 1.000000 -0.864835 0.987862 0.035112 0.024945 0.054764 0.018366 -0.036772 0.009916 -0.051333 -0.023107 0.092485 0.082410 0.004662 -0.076583
peak_mid_peak_fix_max_monthly_diff NaN 0.241873 0.082329 0.364341 0.212421 0.447746 -0.005071 0.736031 -0.695747 0.759631 -0.426496 0.069884 0.411407 0.157086 0.157164 0.023152 0.266221 -0.048888 0.543556 -0.707167 0.775702 0.931142 -0.425158 0.959447 0.911224 -0.690381 0.766106 0.927988 -0.420096 0.957014 0.908661 -0.682581 0.761157 0.925040 -0.414482 0.954094 0.906215 0.063200 -0.152438 -0.077042 -0.865541 0.193091 -0.913432 -0.919178 0.995542 -0.842956 -0.846248 0.176063 -0.855284 -0.864835 1.000000 -0.797295 -0.057861 -0.046120 -0.052107 -0.027541 0.051892 -0.012590 0.063193 0.013711 -0.097144 -0.088172 -0.005386 0.082224
off_peak_mid_peak_fix_max_monthly_diff NaN -0.227262 -0.083103 -0.350454 -0.187308 -0.436261 0.025368 -0.708900 0.736335 -0.746201 0.757334 -0.075254 -0.393848 -0.151846 -0.151932 -0.029954 -0.192748 0.019178 -0.473925 0.744422 -0.764235 -0.924010 0.772011 -0.911615 -0.938048 0.731223 -0.755714 -0.921219 0.760321 -0.909139 -0.935864 0.724143 -0.750336 -0.918524 0.737569 -0.906708 -0.933227 -0.030601 0.215584 0.133373 0.868266 -0.181804 0.922765 0.946980 -0.814295 0.960733 0.886314 -0.090047 0.952830 0.987862 -0.797295 1.000000 0.029869 0.019196 0.056104 0.017558 -0.036689 0.009711 -0.047434 -0.026142 0.090455 0.079909 0.001984 -0.072406
tenure NaN -0.001782 -0.037000 -0.010212 -0.064903 -0.043288 -0.125695 -0.021607 -0.022696 -0.006468 0.005188 -0.037820 -0.048613 -0.132935 -0.133159 -0.044088 -0.062418 0.903659 -0.044448 -0.017387 -0.007428 -0.059740 -0.017800 -0.054174 -0.052940 -0.026183 -0.006922 -0.060002 -0.020838 -0.053371 -0.052090 -0.026518 -0.007278 -0.059581 -0.008148 -0.052884 -0.051463 -0.089871 -0.069605 -0.005744 0.000430 0.062773 0.034587 0.043537 -0.053500 0.037156 -0.011529 0.042890 0.015102 0.035112 -0.057861 0.029869 1.000000 0.955498 0.270993 0.376339 -0.283319 -0.074306 -0.060265 0.232996 -0.106371 0.414698 -0.061182 -0.322592
months_activ NaN 0.024443 -0.032873 0.014248 -0.051330 -0.028887 -0.077109 -0.018337 -0.018111 0.010784 -0.023514 -0.034324 -0.033826 -0.080712 -0.080952 -0.036940 -0.052375 0.970263 -0.034423 -0.018424 0.009144 -0.051500 -0.032158 -0.045681 -0.045470 -0.021215 0.010111 -0.051184 -0.033933 -0.045367 -0.045139 -0.020066 0.010057 -0.050784 -0.024877 -0.045045 -0.044687 -0.090299 -0.043952 -0.015404 -0.013485 0.081716 0.028502 0.033282 -0.043602 0.027223 -0.022407 0.056975 0.013364 0.024945 -0.046120 0.019196 0.955498 1.000000 0.041888 0.424012 -0.052556 -0.066711 -0.066047 0.260298 -0.135293 0.410911 -0.054397 -0.324195
months_to_end NaN -0.069864 0.003955 -0.072788 -0.041631 -0.046736 -0.067661 -0.034449 0.021421 -0.048325 0.098508 0.002174 -0.046561 -0.234475 -0.234395 0.000461 -0.038522 -0.060325 -0.051153 0.021410 -0.049872 -0.049076 0.036194 -0.049518 -0.047293 0.010765 -0.049639 -0.051563 0.034926 -0.048921 -0.046541 0.011259 -0.049086 -0.051487 0.052728 -0.049004 -0.046551 0.004463 -0.036147 0.054127 0.047823 -0.026134 0.042073 0.050291 -0.050900 0.047678 0.047543 -0.010794 0.037271 0.054764 -0.052107 0.056104 0.270993 0.041888 1.000000 -0.187818 -0.868334 -0.053712 0.004115 -0.041915 0.087093 0.050881 -0.033210 -0.021057
months_modif_prod NaN 0.131206 0.007145 0.099014 -0.068461 -0.019037 -0.191101 -0.022078 -0.006928 0.036395 -0.027601 -0.001392 -0.023823 0.003023 0.003181 -0.030133 -0.020199 0.444468 -0.009237 -0.006834 0.034023 -0.038033 -0.009258 -0.030298 -0.033887 -0.008263 0.035279 -0.035416 -0.009934 -0.028539 -0.032316 -0.004742 0.035744 -0.034919 -0.006272 -0.027867 -0.031732 -0.063759 -0.006001 0.001119 -0.030097 0.108757 0.023640 0.024488 -0.022125 0.024378 -0.044974 0.073292 0.004350 0.018366 -0.027541 0.017558 0.376339 0.424012 -0.187818 1.000000 0.086435 -0.008488 -0.117635 0.260248 -0.111874 0.237041 0.002460 -0.211761
months_renewal NaN 0.053129 -0.001304 0.052954 0.048385 0.040997 0.191076 0.034975 0.045929 0.056784 -0.073259 0.001964 0.043987 0.243378 0.243163 0.033557 0.035024 0.038701 0.037946 0.020629 0.053041 0.042113 -0.021242 0.039878 0.037691 0.044688 0.055073 0.043921 -0.015959 0.037257 0.034976 0.049878 0.057283 0.043160 -0.033038 0.036285 0.033923 0.001038 0.123733 -0.016148 -0.036745 0.040614 -0.021178 -0.038919 0.041711 -0.035884 -0.020487 0.031602 0.002807 -0.036772 0.051892 -0.036689 -0.283319 -0.052556 -0.868334 0.086435 1.000000 0.060023 -0.005056 0.020266 -0.067288 -0.058676 0.039018 0.023713
channel_ewpakwlliwisiwduibdlfmalxowmwpci NaN -0.091575 -0.004010 -0.087119 -0.027902 -0.046262 -0.015067 -0.007484 0.008797 -0.016463 0.001886 0.003054 -0.045627 0.024831 0.024883 -0.005054 -0.027863 -0.055191 -0.017853 0.006681 -0.018207 -0.016419 0.009300 -0.014407 -0.014807 0.008749 -0.017429 -0.015600 0.008518 -0.013989 -0.014415 0.009390 -0.017330 -0.014971 0.008519 -0.013497 -0.013945 -0.019601 0.023432 0.005314 0.017090 -0.011445 0.013891 0.014385 -0.012900 0.014362 0.017745 -0.011392 0.012425 0.009916 -0.012590 0.009711 -0.074306 -0.066711 -0.053712 -0.008488 0.060023 1.000000 -0.382224 -0.135030 -0.113685 0.130460 0.144969 -0.222134
channel_foosdfpfkusacimwkcsosbicdxkicaua NaN -0.094734 -0.042855 -0.029073 0.002085 0.053469 0.041276 0.043715 -0.071438 -0.015435 -0.019017 -0.041562 0.048269 0.017309 0.017463 -0.021126 0.010997 -0.077413 0.037618 -0.073199 -0.012485 0.069043 -0.021279 0.065604 0.064938 -0.071737 -0.015015 0.068330 -0.019502 0.065331 0.064637 -0.072142 -0.015558 0.068227 -0.020555 0.065191 0.064347 0.071212 -0.028421 -0.010266 -0.013582 -0.109953 -0.075676 -0.061292 0.063279 -0.057565 -0.011783 -0.088390 -0.067257 -0.051333 0.063193 -0.047434 -0.060265 -0.066047 0.004115 -0.117635 -0.005056 -0.382224 1.000000 -0.577842 -0.486499 -0.347818 -0.196569 0.452078
channel_lmkebamcaaclubfxadlmueccxoimlema NaN 0.341366 0.067439 0.230049 0.079778 0.037262 -0.068687 0.018631 -0.006367 0.114532 -0.055722 0.052856 0.040859 -0.040670 -0.040964 0.018388 0.057804 0.272951 0.018000 -0.003652 0.113643 0.007014 -0.048636 0.012086 0.011226 -0.005009 0.114679 0.006931 -0.050259 0.011336 0.010495 -0.003627 0.113502 0.006624 -0.047180 0.011189 0.010539 -0.072318 -0.009676 -0.012180 -0.094292 0.192823 -0.006240 -0.020236 0.012999 -0.022452 -0.097560 0.156937 -0.012020 -0.023107 0.013711 -0.026142 0.232996 0.260298 -0.041915 0.260248 0.020266 -0.135030 -0.577842 1.000000 -0.171867 0.261346 0.196152 -0.374806
channel_usilxuppasemubllopkaafesmlibmsdf NaN -0.170963 -0.009229 -0.144890 -0.070993 -0.082562 0.030615 -0.077296 0.104883 -0.092951 0.089576 -0.000205 -0.079644 0.000585 0.000648 0.015661 -0.058315 -0.143942 -0.059891 0.105954 -0.094421 -0.094566 0.078199 -0.097026 -0.094793 0.103673 -0.092655 -0.094258 0.078152 -0.096271 -0.093998 0.102321 -0.090881 -0.094548 0.076479 -0.096589 -0.094297 -0.004627 0.033512 0.025074 0.111822 -0.047378 0.105830 0.099996 -0.095862 0.097136 0.112311 -0.038637 0.101387 0.092485 -0.097144 0.090455 -0.106371 -0.135293 0.087093 -0.111874 -0.067288 -0.113685 -0.486499 -0.171867 1.000000 0.106401 -0.060783 -0.049843
origin_up_kamkkxfxxuwbdslkwifmmcsiusiuosws NaN 0.047390 -0.008125 -0.020708 -0.059900 -0.099061 -0.074705 -0.050595 0.082512 0.002831 0.066178 -0.020315 -0.096117 -0.044511 -0.044416 -0.017748 -0.057064 0.389631 -0.077230 0.089631 0.001708 -0.093915 0.053349 -0.090857 -0.090144 0.081036 0.002768 -0.094464 0.049808 -0.091132 -0.090457 0.078202 0.001988 -0.094843 0.052659 -0.091174 -0.090452 -0.090327 -0.018624 0.009481 0.027757 0.122445 0.099135 0.089662 -0.087254 0.086530 0.024003 0.110758 0.089084 0.082410 -0.088172 0.079909 0.414698 0.410911 0.050881 0.237041 -0.058676 0.130460 -0.347818 0.261346 0.106401 1.000000 -0.246416 -0.706252
origin_up_ldkssxwpmemidmecebumciepifcamkci NaN 0.005554 0.002300 -0.028918 0.008405 -0.029786 -0.014981 -0.000226 -0.000745 0.018001 -0.026341 0.009766 -0.028076 -0.029678 -0.030075 0.001124 -0.000987 -0.036552 -0.027328 -0.000567 0.017359 -0.007013 -0.015422 -0.006741 -0.005798 0.000391 0.016425 -0.007413 -0.017303 -0.007606 -0.006603 0.002147 0.017201 -0.006298 -0.017666 -0.006731 -0.005509 -0.030744 0.013623 -0.006961 -0.014406 0.039733 0.004625 0.002776 -0.008096 0.000323 -0.012544 0.039143 0.007089 0.004662 -0.005386 0.001984 -0.061182 -0.054397 -0.033210 0.002460 0.039018 0.144969 -0.196569 0.196152 -0.060783 -0.246416 1.000000 -0.511836
origin_up_lxidpiddsbxsbosboudacockeimpuepw NaN -0.046032 0.005605 0.039364 0.046895 0.109370 0.077191 0.044973 -0.072813 -0.015803 -0.039487 0.010960 0.105503 0.061287 0.061492 0.014986 0.051302 -0.318406 0.088470 -0.079271 -0.014324 0.088510 -0.036116 0.085600 0.084280 -0.072338 -0.014587 0.089288 -0.031597 0.086476 0.085145 -0.071111 -0.014469 0.088809 -0.033850 0.085874 0.084341 0.102576 0.006536 -0.003283 -0.014050 -0.137970 -0.091439 -0.081645 0.083388 -0.077074 -0.012078 -0.127103 -0.084316 -0.076583 0.082224 -0.072406 -0.322592 -0.324195 -0.021057 -0.211761 0.023713 -0.222134 0.452078 -0.374806 -0.049843 -0.706252 -0.511836 1.000000

5. Modelling¶

We now have a dataset containing features that we have engineered and we are ready to start training a predictive model. Remember, we only need to focus on training a Random Forest classifier.

In [65]:
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

Data sampling¶

The first thing we want to do is split our dataset into training and test samples. The reason why we do this, is so that we can simulate a real life situation by generating predictions for our test sample, without showing the predictive model these data points. This gives us the ability to see how well our model is able to generalise to new data, which is critical.

A typical % to dedicate to testing is between 20-30, for this example we will use a 75-25% split between train and test respectively.

In [67]:
# Make a copy of our data
train_df = df.copy()

# Separate target variable from independent variables
y = df['churn'] #target
X = df.drop(columns=['id', 'churn'])
print(X.shape)
print(y.shape)
(10830, 62)
(10830,)
In [68]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
(8122, 62)
(8122,)
(2708, 62)
(2708,)
In [69]:
model = RandomForestClassifier(
    n_estimators=1000
)
model.fit(X_train, y_train)
Out[69]:
RandomForestClassifier(n_estimators=1000)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(n_estimators=1000)

Evaluation¶

In [70]:
predictions = model.predict(X_test)
tn, fp, fn, tp = metrics.confusion_matrix(y_test, predictions).ravel()

This code is used to calculate various metrics related to a confusion matrix, which is often used in binary classification problems to evaluate the performance of a machine learning model. Here's an explanation of what's happening:

  1. predictions = model.predict(X_test): This line is making predictions on the test data (X_test) using a machine learning model (model). It's predicting the target variable based on the features in X_test. These predictions are stored in the predictions variable.

  2. tn, fp, fn, tp = metrics.confusion_matrix(y_test, predictions).ravel(): In this line, the confusion matrix is calculated using the metrics.confusion_matrix function from a library like scikit-learn. The confusion matrix is a 2x2 table that summarizes the model's performance on a binary classification task. The four elements of the matrix are typically:

    • True Negatives (tn): The number of instances that are actually negative (0) and are correctly predicted as negative.
    • False Positives (fp): The number of instances that are actually negative but are incorrectly predicted as positive (1).
    • False Negatives (fn): The number of instances that are actually positive (1) but are incorrectly predicted as negative.
    • True Positives (tp): The number of instances that are actually positive and are correctly predicted as positive.

    The .ravel() method is used to unpack these four values into individual variables, making them more accessible for further calculations.

After running this code, you will have the True Negatives (tn), False Positives (fp), False Negatives (fn), and True Positives (tp), which can be used to calculate various evaluation metrics like accuracy, precision, recall, and F1-score for the machine learning model's performance on the test data.

In [71]:
y_test.value_counts()
Out[71]:
churn
0    2434
1     274
Name: count, dtype: int64
In [72]:
print(f"True positives: {tp}")
print(f"False positives: {fp}")
print(f"True negatives: {tn}")
print(f"False negatives: {fn}\n")

print(f"Accuracy: {metrics.accuracy_score(y_test, predictions)}")
print(f"Precision: {metrics.precision_score(y_test, predictions)}")
print(f"Recall: {metrics.recall_score(y_test, predictions)}")
True positives: 16
False positives: 2
True negatives: 2432
False negatives: 258

Accuracy: 0.9039881831610044
Precision: 0.8888888888888888
Recall: 0.058394160583941604

In the test set, we have around 10% of customers labeled as churners (churn = 1). Here's a breakdown of how the model performed:

  • True Negatives (TN): We correctly identified 3282 out of 3286 non-churners (churn = 0), which is excellent.

  • False Negatives (FN): We predicted 348 clients as non-churners (churn = 0) when they actually churned (churn = 1). This number is relatively high and needs improvement.

  • False Positives (FP): We incorrectly predicted 4 clients as churners when they did not churn, which is quite good.

  • True Positives (TP): Out of 366 clients who actually churned in the test dataset, we correctly identified only 18, which is a poor result.

  • Accuracy: The accuracy score is high but not very informative due to the class imbalance, and it doesn't reveal the whole story.

  • Precision: The precision score is 0.82, which is reasonable but has room for improvement.

  • Recall: The recall score indicates the classifier's poor ability to identify positive samples, which is a significant concern.

The model excels in identifying non-churners but struggles to predict cases where clients do churn. A high percentage of clients who should be identified as churners are being classified as non-churners. This suggests that the current set of features may not be distinct enough to differentiate between churners and non-churners. A data scientist would typically revisit feature engineering and experiment with model parameter optimization to enhance performance.

Features importance¶

In [73]:
feature_importances = pd.DataFrame({
    'features': X_train.columns,
    'importance': model.feature_importances_
}).sort_values(by='importance', ascending=True).reset_index()
In [74]:
plt.figure(figsize=(15, 25))
plt.title('Feature Importances')
plt.barh(range(len(feature_importances)), feature_importances['importance'], color='b', align='center')
plt.yticks(range(len(feature_importances)), feature_importances['features'])
plt.xlabel('Importance')
plt.show()
No description has been provided for this image

The insights from this chart are as follows:

  • High net margin and 12-month consumption play a significant role in predicting churn.
  • Margin on power subscription is another influential factor.
  • Time-related factors, such as the duration of customer activity (tenure) and the time since their contract update, are crucial.
  • Some features recommended by our colleague rank high, and certain derived features perform even better.
  • Price sensitivity features, although scattered, are not the primary drivers of customer churn.

This observation is noteworthy as it relates to our initial hypothesis:

Does customer churn correlate with price sensitivity?

According to the feature importances, price sensitivity is not a primary driver but has a modest impact. To reach a conclusive result, further experimentation is necessary.

In [75]:
proba_predictions = model.predict_proba(X_test)
probabilities = proba_predictions[:, 1]
In [76]:
X_test = X_test.reset_index()
X_test.drop(columns='index', inplace=True)
In [77]:
X_test['churn'] = predictions.tolist()
X_test['churn_probability'] = probabilities.tolist()
X_test.to_csv('C:/Users/Tasnim/Desktop/dataUsed2/out_of_sample_data_with_predictions.csv')
In [ ]: